Commit 971ad4e4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (fixes from Andrew Morton)

Merge misc fixes from Andrew Morton:
 "15 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  MAINTAINERS: add IIO include files
  kernel/panic.c: update comments for print_tainted
  mem-hotplug: reset node present pages when hot-adding a new pgdat
  mem-hotplug: reset node managed pages when hot-adding a new pgdat
  mm/debug-pagealloc: correct freepage accounting and order resetting
  fanotify: fix notification of groups with inode & mount marks
  mm, compaction: prevent infinite loop in compact_zone
  mm: alloc_contig_range: demote pages busy message from warn to info
  mm/slab: fix unalignment problem on Malta with EVA due to slab merge
  mm/page_alloc: restrict max order of merging on isolated pageblock
  mm/page_alloc: move freepage counting logic to __free_one_page()
  mm/page_alloc: add freepage on isolate pageblock to correct buddy list
  mm/page_alloc: fix incorrect isolation behavior by rechecking migratetype
  mm/compaction: skip the range until proper target pageblock is met
  zram: avoid kunmap_atomic() of a NULL pointer
parents b0ab3f19 8fe671fc
......@@ -4716,6 +4716,7 @@ L: linux-iio@vger.kernel.org
S: Maintained
F: drivers/iio/
F: drivers/staging/iio/
F: include/linux/iio/
IKANOS/ADI EAGLE ADSL USB DRIVER
M: Matthieu Castet <castet.matthieu@free.fr>
......
......@@ -560,6 +560,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
}
if (page_zero_filled(uncmem)) {
if (user_mem)
kunmap_atomic(user_mem);
/* Free memory associated with this sector now. */
bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
......
......@@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
&fsnotify_mark_srcu);
}
/*
* We need to merge inode & vfsmount mark lists so that inode mark
* ignore masks are properly reflected for mount mark notifications.
* That's why this traversal is so complicated...
*/
while (inode_node || vfsmount_node) {
inode_group = vfsmount_group = NULL;
inode_group = NULL;
inode_mark = NULL;
vfsmount_group = NULL;
vfsmount_mark = NULL;
if (inode_node) {
inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
......@@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
vfsmount_group = vfsmount_mark->group;
}
if (inode_group > vfsmount_group) {
/* handle inode */
ret = send_to_group(to_tell, inode_mark, NULL, mask,
data, data_is, cookie, file_name);
/* we didn't use the vfsmount_mark */
vfsmount_group = NULL;
} else if (vfsmount_group > inode_group) {
ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
data, data_is, cookie, file_name);
if (inode_group && vfsmount_group) {
int cmp = fsnotify_compare_groups(inode_group,
vfsmount_group);
if (cmp > 0) {
inode_group = NULL;
} else {
ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
mask, data, data_is, cookie,
file_name);
inode_mark = NULL;
} else if (cmp < 0) {
vfsmount_group = NULL;
vfsmount_mark = NULL;
}
}
ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
data, data_is, cookie, file_name);
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
goto out;
......
......@@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
/* protects reads of inode and vfsmount marks list */
extern struct srcu_struct fsnotify_mark_srcu;
/* compare two groups for sorting of marks lists */
extern int fsnotify_compare_groups(struct fsnotify_group *a,
struct fsnotify_group *b);
extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
__u32 mask);
/* add a mark to an inode */
......
......@@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
{
struct fsnotify_mark *lmark, *last = NULL;
int ret = 0;
int cmp;
mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
......@@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
goto out;
}
if (mark->group->priority < lmark->group->priority)
continue;
if ((mark->group->priority == lmark->group->priority) &&
(mark->group < lmark->group))
cmp = fsnotify_compare_groups(lmark->group, mark->group);
if (cmp < 0)
continue;
hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
......
......@@ -209,6 +209,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
mark->ignored_mask = mask;
}
/*
* Sorting function for lists of fsnotify marks.
*
* Fanotify supports different notification classes (reflected as priority of
* notification group). Events shall be passed to notification groups in
* decreasing priority order. To achieve this marks in notification lists for
* inodes and vfsmounts are sorted so that priorities of corresponding groups
* are descending.
*
* Furthermore correct handling of the ignore mask requires processing inode
* and vfsmount marks of each group together. Using the group address as
* further sort criterion provides a unique sorting order and thus we can
* merge inode and vfsmount lists of marks in linear time and find groups
* present in both lists.
*
* A return value of 1 signifies that b has priority over a.
* A return value of 0 signifies that the two marks have to be handled together.
* A return value of -1 signifies that a has priority over b.
*/
int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
{
if (a == b)
return 0;
if (!a)
return 1;
if (!b)
return -1;
if (a->priority < b->priority)
return 1;
if (a->priority > b->priority)
return -1;
if (a < b)
return 1;
return -1;
}
/*
* Attach an initialized mark to a given group and fs object.
* These marks may be used for the fsnotify backend to determine which
......
......@@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
struct mount *m = real_mount(mnt);
struct fsnotify_mark *lmark, *last = NULL;
int ret = 0;
int cmp;
mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
......@@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
goto out;
}
if (mark->group->priority < lmark->group->priority)
continue;
if ((mark->group->priority == lmark->group->priority) &&
(mark->group < lmark->group))
cmp = fsnotify_compare_groups(lmark->group, mark->group);
if (cmp < 0)
continue;
hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
......
......@@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
extern unsigned long free_all_bootmem(void);
extern void reset_node_managed_pages(pg_data_t *pgdat);
extern void reset_all_zones_managed_pages(void);
extern void free_bootmem_node(pg_data_t *pgdat,
......
......@@ -431,6 +431,15 @@ struct zone {
*/
int nr_migrate_reserve_block;
#ifdef CONFIG_MEMORY_ISOLATION
/*
* Number of isolated pageblock. It is used to solve incorrect
* freepage counting problem due to racy retrieving migratetype
* of pageblock. Protected by zone->lock.
*/
unsigned long nr_isolate_pageblock;
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
/* see spanned/present_pages for more description */
seqlock_t span_seqlock;
......
......@@ -2,6 +2,10 @@
#define __LINUX_PAGEISOLATION_H
#ifdef CONFIG_MEMORY_ISOLATION
static inline bool has_isolate_pageblock(struct zone *zone)
{
return zone->nr_isolate_pageblock;
}
static inline bool is_migrate_isolate_page(struct page *page)
{
return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
......@@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype)
return migratetype == MIGRATE_ISOLATE;
}
#else
static inline bool has_isolate_pageblock(struct zone *zone)
{
return false;
}
static inline bool is_migrate_isolate_page(struct page *page)
{
return false;
......
......@@ -244,6 +244,7 @@ static const struct tnt tnts[] = {
* 'I' - Working around severe firmware bug.
* 'O' - Out-of-tree module has been loaded.
* 'E' - Unsigned module has been loaded.
* 'L' - A soft lockup has previously occurred.
*
* The string is overwritten by the next call to print_tainted().
*/
......
......@@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
static int reset_managed_pages_done __initdata;
static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
void reset_node_managed_pages(pg_data_t *pgdat)
{
struct zone *z;
if (reset_managed_pages_done)
return;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
z->managed_pages = 0;
}
......@@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void)
{
struct pglist_data *pgdat;
if (reset_managed_pages_done)
return;
for_each_online_pgdat(pgdat)
reset_node_managed_pages(pgdat);
reset_managed_pages_done = 1;
}
......
......@@ -479,6 +479,16 @@ isolate_freepages_range(struct compact_control *cc,
block_end_pfn = min(block_end_pfn, end_pfn);
/*
* pfn could pass the block_end_pfn if isolated freepage
* is more than pageblock order. In this case, we adjust
* scanning range to right one.
*/
if (pfn >= block_end_pfn) {
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
block_end_pfn = min(block_end_pfn, end_pfn);
}
if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
break;
......@@ -1029,8 +1039,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
}
acct_isolated(zone, cc);
/* Record where migration scanner will be restarted */
cc->migrate_pfn = low_pfn;
/*
* Record where migration scanner will be restarted. If we end up in
* the same pageblock as the free scanner, make the scanners fully
* meet so that compact_finished() terminates compaction.
*/
cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn;
return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
}
......
......@@ -108,6 +108,31 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
/*
* in mm/page_alloc.c
*/
/*
* Locate the struct page for both the matching buddy in our
* pair (buddy1) and the combined O(n+1) page they form (page).
*
* 1) Any buddy B1 will have an order O twin B2 which satisfies
* the following equation:
* B2 = B1 ^ (1 << O)
* For example, if the starting buddy (buddy2) is #8 its order
* 1 buddy is #10:
* B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
*
* 2) Any buddy B will have an order O+1 parent P which
* satisfies the following equation:
* P = B & ~(1 << O)
*
* Assumption: *_mem_map is contiguous at least up to MAX_ORDER
*/
static inline unsigned long
__find_buddy_index(unsigned long page_idx, unsigned int order)
{
return page_idx ^ (1 << order);
}
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __free_pages_bootmem(struct page *page, unsigned int order);
extern void prep_compound_page(struct page *page, unsigned long order);
#ifdef CONFIG_MEMORY_FAILURE
......
......@@ -31,6 +31,7 @@
#include <linux/stop_machine.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
#include <linux/bootmem.h>
#include <asm/tlbflush.h>
......@@ -1066,6 +1067,16 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
}
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
static void reset_node_present_pages(pg_data_t *pgdat)
{
struct zone *z;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
z->present_pages = 0;
pgdat->node_present_pages = 0;
}
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
{
......@@ -1096,6 +1107,21 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
build_all_zonelists(pgdat, NULL);
mutex_unlock(&zonelists_mutex);
/*
* zone->managed_pages is set to an approximate value in
* free_area_init_core(), which will cause
* /sys/device/system/node/nodeX/meminfo has wrong data.
* So reset it to 0 before any memory is onlined.
*/
reset_node_managed_pages(pgdat);
/*
* When memory is hot-added, all the memory is in offline state. So
* clear all zones' present_pages because they will be updated in
* online_pages() and offline_pages().
*/
reset_node_present_pages(pgdat);
return pgdat;
}
......
......@@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void)
static int reset_managed_pages_done __initdata;
static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
void reset_node_managed_pages(pg_data_t *pgdat)
{
struct zone *z;
if (reset_managed_pages_done)
return;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
z->managed_pages = 0;
}
......@@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void)
{
struct pglist_data *pgdat;
if (reset_managed_pages_done)
return;
for_each_online_pgdat(pgdat)
reset_node_managed_pages(pgdat);
reset_managed_pages_done = 1;
}
......
......@@ -466,29 +466,6 @@ static inline void rmv_page_order(struct page *page)
set_page_private(page, 0);
}
/*
* Locate the struct page for both the matching buddy in our
* pair (buddy1) and the combined O(n+1) page they form (page).
*
* 1) Any buddy B1 will have an order O twin B2 which satisfies
* the following equation:
* B2 = B1 ^ (1 << O)
* For example, if the starting buddy (buddy2) is #8 its order
* 1 buddy is #10:
* B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
*
* 2) Any buddy B will have an order O+1 parent P which
* satisfies the following equation:
* P = B & ~(1 << O)
*
* Assumption: *_mem_map is contiguous at least up to MAX_ORDER
*/
static inline unsigned long
__find_buddy_index(unsigned long page_idx, unsigned int order)
{
return page_idx ^ (1 << order);
}
/*
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
......@@ -569,6 +546,7 @@ static inline void __free_one_page(struct page *page,
unsigned long combined_idx;
unsigned long uninitialized_var(buddy_idx);
struct page *buddy;
int max_order = MAX_ORDER;
VM_BUG_ON(!zone_is_initialized(zone));
......@@ -577,13 +555,24 @@ static inline void __free_one_page(struct page *page,
return;
VM_BUG_ON(migratetype == -1);
if (is_migrate_isolate(migratetype)) {
/*
* We restrict max order of merging to prevent merge
* between freepages on isolate pageblock and normal
* pageblock. Without this, pageblock isolation
* could cause incorrect freepage accounting.
*/
max_order = min(MAX_ORDER, pageblock_order + 1);
} else {
__mod_zone_freepage_state(zone, 1 << order, migratetype);
}
page_idx = pfn & ((1 << MAX_ORDER) - 1);
page_idx = pfn & ((1 << max_order) - 1);
VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
VM_BUG_ON_PAGE(bad_range(zone, page), page);
while (order < MAX_ORDER-1) {
while (order < max_order - 1) {
buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx);
if (!page_is_buddy(page, buddy, order))
......@@ -594,9 +583,11 @@ static inline void __free_one_page(struct page *page,
*/
if (page_is_guard(buddy)) {
clear_page_guard_flag(buddy);
set_page_private(page, 0);
set_page_private(buddy, 0);
if (!is_migrate_isolate(migratetype)) {
__mod_zone_freepage_state(zone, 1 << order,
migratetype);
}
} else {
list_del(&buddy->lru);
zone->free_area[order].nr_free--;
......@@ -715,14 +706,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
/* must delete as __free_one_page list manipulates */
list_del(&page->lru);
mt = get_freepage_migratetype(page);
if (unlikely(has_isolate_pageblock(zone)))
mt = get_pageblock_migratetype(page);
/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
if (likely(!is_migrate_isolate_page(page))) {
__mod_zone_page_state(zone, NR_FREE_PAGES, 1);
if (is_migrate_cma(mt))
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
}
} while (--to_free && --batch_free && !list_empty(list));
}
spin_unlock(&zone->lock);
......@@ -739,9 +728,11 @@ static void free_one_page(struct zone *zone,
if (nr_scanned)
__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
if (unlikely(has_isolate_pageblock(zone) ||
is_migrate_isolate(migratetype))) {
migratetype = get_pfnblock_migratetype(page, pfn);
}
__free_one_page(page, pfn, zone, order, migratetype);
if (unlikely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
spin_unlock(&zone->lock);
}
......@@ -1484,7 +1475,7 @@ void split_page(struct page *page, unsigned int order)
}
EXPORT_SYMBOL_GPL(split_page);
static int __isolate_free_page(struct page *page, unsigned int order)
int __isolate_free_page(struct page *page, unsigned int order)
{
unsigned long watermark;
struct zone *zone;
......@@ -6408,13 +6399,12 @@ int alloc_contig_range(unsigned long start, unsigned long end,
/* Make sure the range is really isolated. */
if (test_pages_isolated(outer_start, end, false)) {
pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",
outer_start, end);
pr_info("%s: [%lx, %lx) PFNs busy\n",
__func__, outer_start, end);
ret = -EBUSY;
goto done;
}
/* Grab isolated pages from freelists. */
outer_end = isolate_freepages_range(&cc, outer_start, end);
if (!outer_end) {
......
......@@ -60,6 +60,7 @@ int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
int migratetype = get_pageblock_migratetype(page);
set_pageblock_migratetype(page, MIGRATE_ISOLATE);
zone->nr_isolate_pageblock++;
nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
__mod_zone_freepage_state(zone, -nr_pages, migratetype);
......@@ -75,16 +76,54 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
{
struct zone *zone;
unsigned long flags, nr_pages;
struct page *isolated_page = NULL;
unsigned int order;
unsigned long page_idx, buddy_idx;
struct page *buddy;
zone = page_zone(page);
spin_lock_irqsave(&zone->lock, flags);
if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
goto out;
/*
* Because freepage with more than pageblock_order on isolated
* pageblock is restricted to merge due to freepage counting problem,
* it is possible that there is free buddy page.
* move_freepages_block() doesn't care of merge so we need other
* approach in order to merge them. Isolation and free will make
* these pages to be merged.
*/
if (PageBuddy(page)) {
order = page_order(page);
if (order >= pageblock_order) {
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx);
if (!is_migrate_isolate_page(buddy)) {
__isolate_free_page(page, order);
set_page_refcounted(page);
isolated_page = page;
}
}
}
/*
* If we isolate freepage with more than pageblock_order, there
* should be no freepage in the range, so we could avoid costly
* pageblock scanning for freepage moving.
*/
if (!isolated_page) {
nr_pages = move_freepages_block(zone, page, migratetype);
__mod_zone_freepage_state(zone, nr_pages, migratetype);
}
set_pageblock_migratetype(page, migratetype);
zone->nr_isolate_pageblock--;
out:
spin_unlock_irqrestore(&zone->lock, flags);
if (isolated_page)
__free_pages(isolated_page, order);
}
static inline struct page *
......
......@@ -259,6 +259,10 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
if (s->size - size >= sizeof(void *))
continue;
if (IS_ENABLED(CONFIG_SLAB) && align &&
(align > s->align || s->align % align))
continue;
return s;
}
return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment