diff -drupN a/mm/page_alloc.c b/mm/page_alloc.c --- a/mm/page_alloc.c 2018-08-06 17:23:04.000000000 +0300 +++ b/mm/page_alloc.c 2022-06-12 05:28:14.000000000 +0300 @@ -253,10 +253,22 @@ compound_page_dtor * const compound_page #endif }; +/* + * Try to keep at least this much lowmem free. Do not allow normal + * allocations below this point, only high priority ones. Automatically + * tuned according to the amount of memory in the system. + */ int min_free_kbytes = 1024; int user_min_free_kbytes = -1; int watermark_scale_factor = 10; +/* + * Extra memory for the system to try freeing. Used to temporarily + * free memory, to make space for new workloads. Anyone can allocate + * down to the min watermarks controlled by min_free_kbytes above. + */ +int extra_free_kbytes = 0; + static unsigned long __meminitdata nr_kernel_pages; static unsigned long __meminitdata nr_all_pages; static unsigned long __meminitdata dma_reserve; @@ -1853,14 +1865,38 @@ static int fallbacks[MIGRATE_TYPES][4] = }; #ifdef CONFIG_CMA -static struct page *__rmqueue_cma_fallback(struct zone *zone, - unsigned int order) +static struct page *__rmqueue_cma_prev(struct zone *zone, + unsigned int order, int migratetype) { - return __rmqueue_smallest(zone, order, MIGRATE_CMA); + int target_migratetype = migratetype; + long free_cma = zone_page_state(zone, NR_FREE_CMA_PAGES); + struct page *page = NULL; + + if (free_cma) { + long free_pages = zone_page_state(zone, NR_FREE_PAGES); + long high_mark = high_wmark_pages(zone)*3/2; + long low_mark = low_wmark_pages(zone)/2; + + /* if free_cma is below low_mark,do not allcate memory from MIGRATE_CMA, + * because the race between alloc_contig_range() and MIGRATE_MOVABLE page + * allocation will cause the test_pages_isolated() -BUSY fail! + */ + if ((free_pages - free_cma < high_mark) && + (free_cma > low_mark)) + target_migratetype = MIGRATE_CMA; + } + + page = __rmqueue_smallest(zone, order, target_migratetype); + if (unlikely(!page && target_migratetype == MIGRATE_CMA)) { + page = __rmqueue_smallest(zone, order, migratetype); + target_migratetype = migratetype; + } + + return page; } #else -static inline struct page *__rmqueue_cma_fallback(struct zone *zone, - unsigned int order) { return NULL; } +static inline struct page *__rmqueue_cma_prev(struct zone *zone, + unsigned int order, int migratetype) { return NULL; } #endif /* @@ -2050,6 +2086,11 @@ static void reserve_highatomic_pageblock int mt; unsigned long max_managed, flags; + /* if 1% of a zone is less than pageblock_nr_pages, do not reserve + * pageblock for high-order atomic allocation */ + if ((zone->managed_pages / 100) < pageblock_nr_pages) + return; + /* * Limit the number reserved to 1 pageblock or roughly 1% of a zone. * Check is race-prone but harmless. @@ -2206,14 +2247,15 @@ static struct page *__rmqueue(struct zon { struct page *page; - page = __rmqueue_smallest(zone, order, migratetype); - if (unlikely(!page)) { - if (migratetype == MIGRATE_MOVABLE) - page = __rmqueue_cma_fallback(zone, order); +#ifdef CONFIG_CMA + if (migratetype == MIGRATE_MOVABLE) + page = __rmqueue_cma_prev(zone, order, migratetype); + else +#endif + page = __rmqueue_smallest(zone, order, migratetype); - if (!page) - page = __rmqueue_fallback(zone, order, migratetype); - } + if (unlikely(!page)) + page = __rmqueue_fallback(zone, order, migratetype); trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; @@ -2341,8 +2383,9 @@ static void drain_pages(unsigned int cpu * The CPU has to be pinned. When zone parameter is non-NULL, spill just * the single zone's pages. */ -void drain_local_pages(struct zone *zone) +void drain_local_pages(void *z) { + struct zone *zone = (struct zone *)z; int cpu = smp_processor_id(); if (zone) @@ -2402,8 +2445,7 @@ void drain_all_pages(struct zone *zone) else cpumask_clear_cpu(cpu, &cpus_with_pcps); } - on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, - zone, 1); + on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, zone, 1); } #ifdef CONFIG_HIBERNATION @@ -6663,6 +6705,7 @@ static void setup_per_zone_lowmem_reserv static void __setup_per_zone_wmarks(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); + unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; struct zone *zone; unsigned long flags; @@ -6674,11 +6717,14 @@ static void __setup_per_zone_wmarks(void } for_each_zone(zone) { - u64 tmp; + u64 min, low; spin_lock_irqsave(&zone->lock, flags); - tmp = (u64)pages_min * zone->managed_pages; - do_div(tmp, lowmem_pages); + min = (u64)pages_min * zone->managed_pages; + do_div(min, lowmem_pages); + low = (u64)pages_low * zone->managed_pages; + do_div(low, vm_total_pages); + if (is_highmem(zone)) { /* * __GFP_HIGH and PF_MEMALLOC allocations usually don't @@ -6699,7 +6745,7 @@ static void __setup_per_zone_wmarks(void * If it's a lowmem zone, reserve a number of pages * proportionate to the zone's size. */ - zone->watermark[WMARK_MIN] = tmp; + zone->watermark[WMARK_MIN] = min; } /* @@ -6707,12 +6753,14 @@ static void __setup_per_zone_wmarks(void * scale factor in proportion to available memory, but * ensure a minimum size on small systems. */ - tmp = max_t(u64, tmp >> 2, + min = max_t(u64, min >> 2, mult_frac(zone->managed_pages, watermark_scale_factor, 10000)); - zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; - zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2; + zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + + low + min; + zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + + low + min * 2; spin_unlock_irqrestore(&zone->lock, flags); } @@ -6793,7 +6841,7 @@ core_initcall(init_per_zone_wmark_min) /* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so * that we can call two helper functions whenever min_free_kbytes - * changes. + * or extra_free_kbytes changes. */ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos)