Skip to content

Commit dc2da7b

Browse files
Baoquan Hetorvalds
authored andcommitted
mm: memmap defer init doesn't work as expected
VMware observed a performance regression during memmap init on their platform, and bisected to commit 73a6e47 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") causing it. Before the commit: [0.033176] Normal zone: 1445888 pages used for memmap [0.033176] Normal zone: 89391104 pages, LIFO batch:63 [0.035851] ACPI: PM-Timer IO Port: 0x448 With commit [0.026874] Normal zone: 1445888 pages used for memmap [0.026875] Normal zone: 89391104 pages, LIFO batch:63 [2.028450] ACPI: PM-Timer IO Port: 0x448 The root cause is the current memmap defer init doesn't work as expected. Before, memmap_init_zone() was used to do memmap init of one whole zone, to initialize all low zones of one numa node, but defer memmap init of the last zone in that numa node. However, since commit 73a6e47, function memmap_init() is adapted to iterater over memblock regions inside one zone, then call memmap_init_zone() to do memmap init for each region. E.g, on VMware's system, the memory layout is as below, there are two memory regions in node 2. The current code will mistakenly initialize the whole 1st region [mem 0xab00000000-0xfcffffffff], then do memmap defer to iniatialize only one memmory section on the 2nd region [mem 0x10000000000-0x1033fffffff]. In fact, we only expect to see that there's only one memory section's memmap initialized. That's why more time is costed at the time. [ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x0009ffff] [ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00100000-0xbfffffff] [ 0.008843] ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0x55ffffffff] [ 0.008844] ACPI: SRAT: Node 1 PXM 1 [mem 0x5600000000-0xaaffffffff] [ 0.008844] ACPI: SRAT: Node 2 PXM 2 [mem 0xab00000000-0xfcffffffff] [ 0.008845] ACPI: SRAT: Node 2 PXM 2 [mem 0x10000000000-0x1033fffffff] Now, let's add a parameter 'zone_end_pfn' to memmap_init_zone() to pass down the real zone end pfn so that defer_init() can use it to judge whether defer need be taken in zone wide. Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Fixes: commit 73a6e47 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") Signed-off-by: Baoquan He <[email protected]> Reported-by: Rahul Gopakumar <[email protected]> Reviewed-by: Mike Rapoport <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 6d87d0e commit dc2da7b

File tree

4 files changed

+11
-8
lines changed

4 files changed

+11
-8
lines changed

arch/ia64/mm/init.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
536536

537537
if (map_start < map_end)
538538
memmap_init_zone((unsigned long)(map_end - map_start),
539-
args->nid, args->zone, page_to_pfn(map_start),
539+
args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
540540
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
541541
return 0;
542542
}
@@ -546,7 +546,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone,
546546
unsigned long start_pfn)
547547
{
548548
if (!vmem_map) {
549-
memmap_init_zone(size, nid, zone, start_pfn,
549+
memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
550550
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
551551
} else {
552552
struct page *start;

include/linux/mm.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,8 +2439,9 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn);
24392439
#endif
24402440

24412441
extern void set_dma_reserve(unsigned long new_dma_reserve);
2442-
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
2443-
enum meminit_context, struct vmem_altmap *, int migratetype);
2442+
extern void memmap_init_zone(unsigned long, int, unsigned long,
2443+
unsigned long, unsigned long, enum meminit_context,
2444+
struct vmem_altmap *, int migratetype);
24442445
extern void setup_per_zone_wmarks(void);
24452446
extern int __meminit init_per_zone_wmark_min(void);
24462447
extern void mem_init(void);

mm/memory_hotplug.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
713713
* expects the zone spans the pfn range. All the pages in the range
714714
* are reserved so nobody should be touching them so we should be safe
715715
*/
716-
memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
716+
memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
717717
MEMINIT_HOTPLUG, altmap, migratetype);
718718

719719
set_zone_contiguous(zone);

mm/page_alloc.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
423423
if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
424424
return false;
425425

426+
if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
427+
return true;
426428
/*
427429
* We start only with one section of pages, more pages are added as
428430
* needed until the rest of deferred pages are initialized.
@@ -6116,7 +6118,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
61166118
* zone stats (e.g., nr_isolate_pageblock) are touched.
61176119
*/
61186120
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
6119-
unsigned long start_pfn,
6121+
unsigned long start_pfn, unsigned long zone_end_pfn,
61206122
enum meminit_context context,
61216123
struct vmem_altmap *altmap, int migratetype)
61226124
{
@@ -6152,7 +6154,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
61526154
if (context == MEMINIT_EARLY) {
61536155
if (overlap_memmap_init(zone, &pfn))
61546156
continue;
6155-
if (defer_init(nid, pfn, end_pfn))
6157+
if (defer_init(nid, pfn, zone_end_pfn))
61566158
break;
61576159
}
61586160

@@ -6266,7 +6268,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid,
62666268

62676269
if (end_pfn > start_pfn) {
62686270
size = end_pfn - start_pfn;
6269-
memmap_init_zone(size, nid, zone, start_pfn,
6271+
memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
62706272
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
62716273
}
62726274
}

0 commit comments

Comments
 (0)