LCOV - Real - mm/compaction.c

LCOV - code coverage report

Current view:	top level - mm - compaction.c (source / functions)		Hit	Total	Coverage
Test:	Real	Lines:	152	802	19.0 %
Date:	2020-10-17 15:46:43	Functions:	5	54	9.3 %
Legend:	Neither, QEMU, Real, Both	Branches:	0	0	-

           Branch data     Line data    Source code

       1                 :            : // SPDX-License-Identifier: GPL-2.0
       2                 :            : /*
       3                 :            :  * linux/mm/compaction.c
       4                 :            :  *
       5                 :            :  * Memory compaction for the reduction of external fragmentation. Note that
       6                 :            :  * this heavily depends upon page migration to do all the real heavy
       7                 :            :  * lifting
       8                 :            :  *
       9                 :            :  * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
      10                 :            :  */
      11                 :            : #include <linux/cpu.h>
      12                 :            : #include <linux/swap.h>
      13                 :            : #include <linux/migrate.h>
      14                 :            : #include <linux/compaction.h>
      15                 :            : #include <linux/mm_inline.h>
      16                 :            : #include <linux/sched/signal.h>
      17                 :            : #include <linux/backing-dev.h>
      18                 :            : #include <linux/sysctl.h>
      19                 :            : #include <linux/sysfs.h>
      20                 :            : #include <linux/page-isolation.h>
      21                 :            : #include <linux/kasan.h>
      22                 :            : #include <linux/kthread.h>
      23                 :            : #include <linux/freezer.h>
      24                 :            : #include <linux/page_owner.h>
      25                 :            : #include <linux/psi.h>
      26                 :            : #include "internal.h"
      27                 :            : 
      28                 :            : #ifdef CONFIG_COMPACTION
      29                 :            : static inline void count_compact_event(enum vm_event_item item)
      30                 :            : {
      31                 :            :         count_vm_event(item);
      32                 :            : }
      33                 :            : 
      34                 :            : static inline void count_compact_events(enum vm_event_item item, long delta)
      35                 :            : {
      36                 :            :         count_vm_events(item, delta);
      37                 :            : }
      38                 :            : #else
      39                 :            : #define count_compact_event(item) do { } while (0)
      40                 :            : #define count_compact_events(item, delta) do { } while (0)
      41                 :            : #endif
      42                 :            : 
      43                 :            : #if defined CONFIG_COMPACTION || defined CONFIG_CMA
      44                 :            : 
      45                 :            : #define CREATE_TRACE_POINTS
      46                 :            : #include <trace/events/compaction.h>
      47                 :            : 
      48                 :            : #define block_start_pfn(pfn, order)     round_down(pfn, 1UL << (order))
      49                 :            : #define block_end_pfn(pfn, order)       ALIGN((pfn) + 1, 1UL << (order))
      50                 :            : #define pageblock_start_pfn(pfn)        block_start_pfn(pfn, pageblock_order)
      51                 :            : #define pageblock_end_pfn(pfn)          block_end_pfn(pfn, pageblock_order)
      52                 :            : 
      53                 :          0 : static unsigned long release_freepages(struct list_head *freelist)
      54                 :            : {
      55                 :            :         struct page *page, *next;
      56                 :            :         unsigned long high_pfn = 0;
      57                 :            : 
      58                 :          0 :         list_for_each_entry_safe(page, next, freelist, lru) {
      59                 :          0 :                 unsigned long pfn = page_to_pfn(page);
      60                 :            :                 list_del(&page->lru);
      61                 :          0 :                 __free_page(page);
      62                 :          0 :                 if (pfn > high_pfn)
      63                 :            :                         high_pfn = pfn;
      64                 :            :         }
      65                 :            : 
      66                 :          0 :         return high_pfn;
      67                 :            : }
      68                 :            : 
      69                 :          3 : static void split_map_pages(struct list_head *list)
      70                 :            : {
      71                 :            :         unsigned int i, order, nr_pages;
      72                 :            :         struct page *page, *next;
      73                 :          3 :         LIST_HEAD(tmp_list);
      74                 :            : 
      75                 :          3 :         list_for_each_entry_safe(page, next, list, lru) {
      76                 :            :                 list_del(&page->lru);
      77                 :            : 
      78                 :          3 :                 order = page_private(page);
      79                 :          3 :                 nr_pages = 1 << order;
      80                 :            : 
      81                 :          3 :                 post_alloc_hook(page, order, __GFP_MOVABLE);
      82                 :          3 :                 if (order)
      83                 :          3 :                         split_page(page, order);
      84                 :            : 
      85                 :          3 :                 for (i = 0; i < nr_pages; i++) {
      86                 :          3 :                         list_add(&page->lru, &tmp_list);
      87                 :          3 :                         page++;
      88                 :            :                 }
      89                 :            :         }
      90                 :            : 
      91                 :            :         list_splice(&tmp_list, list);
      92                 :          3 : }
      93                 :            : 
      94                 :            : #ifdef CONFIG_COMPACTION
      95                 :            : 
      96                 :          0 : int PageMovable(struct page *page)
      97                 :            : {
      98                 :            :         struct address_space *mapping;
      99                 :            : 
     100                 :            :         VM_BUG_ON_PAGE(!PageLocked(page), page);
     101                 :          0 :         if (!__PageMovable(page))
     102                 :            :                 return 0;
     103                 :            : 
     104                 :          0 :         mapping = page_mapping(page);
     105                 :          0 :         if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
     106                 :            :                 return 1;
     107                 :            : 
     108                 :          0 :         return 0;
     109                 :            : }
     110                 :            : EXPORT_SYMBOL(PageMovable);
     111                 :            : 
     112                 :          0 : void __SetPageMovable(struct page *page, struct address_space *mapping)
     113                 :            : {
     114                 :            :         VM_BUG_ON_PAGE(!PageLocked(page), page);
     115                 :            :         VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
     116                 :          0 :         page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
     117                 :          0 : }
     118                 :            : EXPORT_SYMBOL(__SetPageMovable);
     119                 :            : 
     120                 :          0 : void __ClearPageMovable(struct page *page)
     121                 :            : {
     122                 :            :         VM_BUG_ON_PAGE(!PageLocked(page), page);
     123                 :            :         VM_BUG_ON_PAGE(!PageMovable(page), page);
     124                 :            :         /*
     125                 :            :          * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
     126                 :            :          * flag so that VM can catch up released page by driver after isolation.
     127                 :            :          * With it, VM migration doesn't try to put it back.
     128                 :            :          */
     129                 :          0 :         page->mapping = (void *)((unsigned long)page->mapping &
     130                 :            :                                 PAGE_MAPPING_MOVABLE);
     131                 :          0 : }
     132                 :            : EXPORT_SYMBOL(__ClearPageMovable);
     133                 :            : 
     134                 :            : /* Do not skip compaction more than 64 times */
     135                 :            : #define COMPACT_MAX_DEFER_SHIFT 6
     136                 :            : 
     137                 :            : /*
     138                 :            :  * Compaction is deferred when compaction fails to result in a page
     139                 :            :  * allocation success. 1 << compact_defer_limit compactions are skipped up
     140                 :            :  * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
     141                 :            :  */
     142                 :          0 : void defer_compaction(struct zone *zone, int order)
     143                 :            : {
     144                 :          0 :         zone->compact_considered = 0;
     145                 :          0 :         zone->compact_defer_shift++;
     146                 :            : 
     147                 :          0 :         if (order < zone->compact_order_failed)
     148                 :          0 :                 zone->compact_order_failed = order;
     149                 :            : 
     150                 :          0 :         if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
     151                 :          0 :                 zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
     152                 :            : 
     153                 :          0 :         trace_mm_compaction_defer_compaction(zone, order);
     154                 :          0 : }
     155                 :            : 
     156                 :            : /* Returns true if compaction should be skipped this time */
     157                 :          0 : bool compaction_deferred(struct zone *zone, int order)
     158                 :            : {
     159                 :          0 :         unsigned long defer_limit = 1UL << zone->compact_defer_shift;
     160                 :            : 
     161                 :          0 :         if (order < zone->compact_order_failed)
     162                 :            :                 return false;
     163                 :            : 
     164                 :            :         /* Avoid possible overflow */
     165                 :          0 :         if (++zone->compact_considered > defer_limit)
     166                 :          0 :                 zone->compact_considered = defer_limit;
     167                 :            : 
     168                 :          0 :         if (zone->compact_considered >= defer_limit)
     169                 :            :                 return false;
     170                 :            : 
     171                 :          0 :         trace_mm_compaction_deferred(zone, order);
     172                 :            : 
     173                 :          0 :         return true;
     174                 :            : }
     175                 :            : 
     176                 :            : /*
     177                 :            :  * Update defer tracking counters after successful compaction of given order,
     178                 :            :  * which means an allocation either succeeded (alloc_success == true) or is
     179                 :            :  * expected to succeed.
     180                 :            :  */
     181                 :          0 : void compaction_defer_reset(struct zone *zone, int order,
     182                 :            :                 bool alloc_success)
     183                 :            : {
     184                 :          0 :         if (alloc_success) {
     185                 :          0 :                 zone->compact_considered = 0;
     186                 :          0 :                 zone->compact_defer_shift = 0;
     187                 :            :         }
     188                 :          0 :         if (order >= zone->compact_order_failed)
     189                 :          0 :                 zone->compact_order_failed = order + 1;
     190                 :            : 
     191                 :          0 :         trace_mm_compaction_defer_reset(zone, order);
     192                 :          0 : }
     193                 :            : 
     194                 :            : /* Returns true if restarting compaction after many failures */
     195                 :          0 : bool compaction_restarting(struct zone *zone, int order)
     196                 :            : {
     197                 :          0 :         if (order < zone->compact_order_failed)
     198                 :            :                 return false;
     199                 :            : 
     200                 :          0 :         return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
     201                 :          0 :                 zone->compact_considered >= 1UL << zone->compact_defer_shift;
     202                 :            : }
     203                 :            : 
     204                 :            : /* Returns true if the pageblock should be scanned for pages to isolate. */
     205                 :          0 : static inline bool isolation_suitable(struct compact_control *cc,
     206                 :            :                                         struct page *page)
     207                 :            : {
     208                 :          0 :         if (cc->ignore_skip_hint)
     209                 :            :                 return true;
     210                 :            : 
     211                 :          0 :         return !get_pageblock_skip(page);
     212                 :            : }
     213                 :            : 
     214                 :            : static void reset_cached_positions(struct zone *zone)
     215                 :            : {
     216                 :          0 :         zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
     217                 :          0 :         zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
     218                 :          0 :         zone->compact_cached_free_pfn =
     219                 :          0 :                                 pageblock_start_pfn(zone_end_pfn(zone) - 1);
     220                 :            : }
     221                 :            : 
     222                 :            : /*
     223                 :            :  * Compound pages of >= pageblock_order should consistenly be skipped until
     224                 :            :  * released. It is always pointless to compact pages of such order (if they are
     225                 :            :  * migratable), and the pageblocks they occupy cannot contain any free pages.
     226                 :            :  */
     227                 :          0 : static bool pageblock_skip_persistent(struct page *page)
     228                 :            : {
     229                 :          0 :         if (!PageCompound(page))
     230                 :            :                 return false;
     231                 :            : 
     232                 :            :         page = compound_head(page);
     233                 :            : 
     234                 :          0 :         if (compound_order(page) >= pageblock_order)
     235                 :            :                 return true;
     236                 :            : 
     237                 :          0 :         return false;
     238                 :            : }
     239                 :            : 
     240                 :            : static bool
     241                 :          0 : __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
     242                 :            :                                                         bool check_target)
     243                 :            : {
     244                 :          0 :         struct page *page = pfn_to_online_page(pfn);
     245                 :            :         struct page *block_page;
     246                 :            :         struct page *end_page;
     247                 :            :         unsigned long block_pfn;
     248                 :            : 
     249                 :          0 :         if (!page)
     250                 :            :                 return false;
     251                 :          0 :         if (zone != page_zone(page))
     252                 :            :                 return false;
     253                 :          0 :         if (pageblock_skip_persistent(page))
     254                 :            :                 return false;
     255                 :            : 
     256                 :            :         /*
     257                 :            :          * If skip is already cleared do no further checking once the
     258                 :            :          * restart points have been set.
     259                 :            :          */
     260                 :          0 :         if (check_source && check_target && !get_pageblock_skip(page))
     261                 :            :                 return true;
     262                 :            : 
     263                 :            :         /*
     264                 :            :          * If clearing skip for the target scanner, do not select a
     265                 :            :          * non-movable pageblock as the starting point.
     266                 :            :          */
     267                 :          0 :         if (!check_source && check_target &&
     268                 :          0 :             get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
     269                 :            :                 return false;
     270                 :            : 
     271                 :            :         /* Ensure the start of the pageblock or zone is online and valid */
     272                 :          0 :         block_pfn = pageblock_start_pfn(pfn);
     273                 :          0 :         block_pfn = max(block_pfn, zone->zone_start_pfn);
     274                 :          0 :         block_page = pfn_to_online_page(block_pfn);
     275                 :          0 :         if (block_page) {
     276                 :            :                 page = block_page;
     277                 :            :                 pfn = block_pfn;
     278                 :            :         }
     279                 :            : 
     280                 :            :         /* Ensure the end of the pageblock or zone is online and valid */
     281                 :          0 :         block_pfn = pageblock_end_pfn(pfn) - 1;
     282                 :          0 :         block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
     283                 :          0 :         end_page = pfn_to_online_page(block_pfn);
     284                 :          0 :         if (!end_page)
     285                 :            :                 return false;
     286                 :            : 
     287                 :            :         /*
     288                 :            :          * Only clear the hint if a sample indicates there is either a
     289                 :            :          * free page or an LRU page in the block. One or other condition
     290                 :            :          * is necessary for the block to be a migration source/target.
     291                 :            :          */
     292                 :            :         do {
     293                 :            :                 if (pfn_valid_within(pfn)) {
     294                 :          0 :                         if (check_source && PageLRU(page)) {
     295                 :          0 :                                 clear_pageblock_skip(page);
     296                 :          0 :                                 return true;
     297                 :            :                         }
     298                 :            : 
     299                 :          0 :                         if (check_target && PageBuddy(page)) {
     300                 :          0 :                                 clear_pageblock_skip(page);
     301                 :          0 :                                 return true;
     302                 :            :                         }
     303                 :            :                 }
     304                 :            : 
     305                 :          0 :                 page += (1 << PAGE_ALLOC_COSTLY_ORDER);
     306                 :            :                 pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
     307                 :          0 :         } while (page <= end_page);
     308                 :            : 
     309                 :            :         return false;
     310                 :            : }
     311                 :            : 
     312                 :            : /*
     313                 :            :  * This function is called to clear all cached information on pageblocks that
     314                 :            :  * should be skipped for page isolation when the migrate and free page scanner
     315                 :            :  * meet.
     316                 :            :  */
     317                 :          0 : static void __reset_isolation_suitable(struct zone *zone)
     318                 :            : {
     319                 :          0 :         unsigned long migrate_pfn = zone->zone_start_pfn;
     320                 :          0 :         unsigned long free_pfn = zone_end_pfn(zone) - 1;
     321                 :            :         unsigned long reset_migrate = free_pfn;
     322                 :            :         unsigned long reset_free = migrate_pfn;
     323                 :            :         bool source_set = false;
     324                 :            :         bool free_set = false;
     325                 :            : 
     326                 :          0 :         if (!zone->compact_blockskip_flush)
     327                 :          0 :                 return;
     328                 :            : 
     329                 :          0 :         zone->compact_blockskip_flush = false;
     330                 :            : 
     331                 :            :         /*
     332                 :            :          * Walk the zone and update pageblock skip information. Source looks
     333                 :            :          * for PageLRU while target looks for PageBuddy. When the scanner
     334                 :            :          * is found, both PageBuddy and PageLRU are checked as the pageblock
     335                 :            :          * is suitable as both source and target.
     336                 :            :          */
     337                 :          0 :         for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages,
     338                 :          0 :                                         free_pfn -= pageblock_nr_pages) {
     339                 :          0 :                 cond_resched();
     340                 :            : 
     341                 :            :                 /* Update the migrate PFN */
     342                 :          0 :                 if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) &&
     343                 :            :                     migrate_pfn < reset_migrate) {
     344                 :            :                         source_set = true;
     345                 :            :                         reset_migrate = migrate_pfn;
     346                 :          0 :                         zone->compact_init_migrate_pfn = reset_migrate;
     347                 :          0 :                         zone->compact_cached_migrate_pfn[0] = reset_migrate;
     348                 :          0 :                         zone->compact_cached_migrate_pfn[1] = reset_migrate;
     349                 :            :                 }
     350                 :            : 
     351                 :            :                 /* Update the free PFN */
     352                 :          0 :                 if (__reset_isolation_pfn(zone, free_pfn, free_set, true) &&
     353                 :            :                     free_pfn > reset_free) {
     354                 :            :                         free_set = true;
     355                 :            :                         reset_free = free_pfn;
     356                 :          0 :                         zone->compact_init_free_pfn = reset_free;
     357                 :          0 :                         zone->compact_cached_free_pfn = reset_free;
     358                 :            :                 }
     359                 :            :         }
     360                 :            : 
     361                 :            :         /* Leave no distance if no suitable block was reset */
     362                 :          0 :         if (reset_migrate >= reset_free) {
     363                 :          0 :                 zone->compact_cached_migrate_pfn[0] = migrate_pfn;
     364                 :          0 :                 zone->compact_cached_migrate_pfn[1] = migrate_pfn;
     365                 :          0 :                 zone->compact_cached_free_pfn = free_pfn;
     366                 :            :         }
     367                 :            : }
     368                 :            : 
     369                 :          3 : void reset_isolation_suitable(pg_data_t *pgdat)
     370                 :            : {
     371                 :            :         int zoneid;
     372                 :            : 
     373                 :          3 :         for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
     374                 :          3 :                 struct zone *zone = &pgdat->node_zones[zoneid];
     375                 :          3 :                 if (!populated_zone(zone))
     376                 :          3 :                         continue;
     377                 :            : 
     378                 :            :                 /* Only flush if a full compaction finished recently */
     379                 :          3 :                 if (zone->compact_blockskip_flush)
     380                 :          0 :                         __reset_isolation_suitable(zone);
     381                 :            :         }
     382                 :          3 : }
     383                 :            : 
     384                 :            : /*
     385                 :            :  * Sets the pageblock skip bit if it was clear. Note that this is a hint as
     386                 :            :  * locks are not required for read/writers. Returns true if it was already set.
     387                 :            :  */
     388                 :          0 : static bool test_and_set_skip(struct compact_control *cc, struct page *page,
     389                 :            :                                                         unsigned long pfn)
     390                 :            : {
     391                 :            :         bool skip;
     392                 :            : 
     393                 :            :         /* Do no update if skip hint is being ignored */
     394                 :          0 :         if (cc->ignore_skip_hint)
     395                 :            :                 return false;
     396                 :            : 
     397                 :          0 :         if (!IS_ALIGNED(pfn, pageblock_nr_pages))
     398                 :            :                 return false;
     399                 :            : 
     400                 :          0 :         skip = get_pageblock_skip(page);
     401                 :          0 :         if (!skip && !cc->no_set_skip_hint)
     402                 :          0 :                 set_pageblock_skip(page);
     403                 :            : 
     404                 :          0 :         return skip;
     405                 :            : }
     406                 :            : 
     407                 :          3 : static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
     408                 :            : {
     409                 :          3 :         struct zone *zone = cc->zone;
     410                 :            : 
     411                 :          3 :         pfn = pageblock_end_pfn(pfn);
     412                 :            : 
     413                 :            :         /* Set for isolation rather than compaction */
     414                 :          3 :         if (cc->no_set_skip_hint)
     415                 :          3 :                 return;
     416                 :            : 
     417                 :          0 :         if (pfn > zone->compact_cached_migrate_pfn[0])
     418                 :          0 :                 zone->compact_cached_migrate_pfn[0] = pfn;
     419                 :          0 :         if (cc->mode != MIGRATE_ASYNC &&
     420                 :          0 :             pfn > zone->compact_cached_migrate_pfn[1])
     421                 :          0 :                 zone->compact_cached_migrate_pfn[1] = pfn;
     422                 :            : }
     423                 :            : 
     424                 :            : /*
     425                 :            :  * If no pages were isolated then mark this pageblock to be skipped in the
     426                 :            :  * future. The information is later cleared by __reset_isolation_suitable().
     427                 :            :  */
     428                 :          0 : static void update_pageblock_skip(struct compact_control *cc,
     429                 :            :                         struct page *page, unsigned long pfn)
     430                 :            : {
     431                 :          0 :         struct zone *zone = cc->zone;
     432                 :            : 
     433                 :          0 :         if (cc->no_set_skip_hint)
     434                 :            :                 return;
     435                 :            : 
     436                 :          0 :         if (!page)
     437                 :            :                 return;
     438                 :            : 
     439                 :          0 :         set_pageblock_skip(page);
     440                 :            : 
     441                 :            :         /* Update where async and sync compaction should restart */
     442                 :          0 :         if (pfn < zone->compact_cached_free_pfn)
     443                 :          0 :                 zone->compact_cached_free_pfn = pfn;
     444                 :            : }
     445                 :            : #else
     446                 :            : static inline bool isolation_suitable(struct compact_control *cc,
     447                 :            :                                         struct page *page)
     448                 :            : {
     449                 :            :         return true;
     450                 :            : }
     451                 :            : 
     452                 :            : static inline bool pageblock_skip_persistent(struct page *page)
     453                 :            : {
     454                 :            :         return false;
     455                 :            : }
     456                 :            : 
     457                 :            : static inline void update_pageblock_skip(struct compact_control *cc,
     458                 :            :                         struct page *page, unsigned long pfn)
     459                 :            : {
     460                 :            : }
     461                 :            : 
     462                 :            : static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
     463                 :            : {
     464                 :            : }
     465                 :            : 
     466                 :            : static bool test_and_set_skip(struct compact_control *cc, struct page *page,
     467                 :            :                                                         unsigned long pfn)
     468                 :            : {
     469                 :            :         return false;
     470                 :            : }
     471                 :            : #endif /* CONFIG_COMPACTION */
     472                 :            : 
     473                 :            : /*
     474                 :            :  * Compaction requires the taking of some coarse locks that are potentially
     475                 :            :  * very heavily contended. For async compaction, trylock and record if the
     476                 :            :  * lock is contended. The lock will still be acquired but compaction will
     477                 :            :  * abort when the current block is finished regardless of success rate.
     478                 :            :  * Sync compaction acquires the lock.
     479                 :            :  *
     480                 :            :  * Always returns true which makes it easier to track lock state in callers.
     481                 :            :  */
     482                 :          3 : static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
     483                 :            :                                                 struct compact_control *cc)
     484                 :            : {
     485                 :            :         /* Track if the lock is contended in async mode */
     486                 :          3 :         if (cc->mode == MIGRATE_ASYNC && !cc->contended) {
     487                 :          0 :                 if (spin_trylock_irqsave(lock, *flags))
     488                 :            :                         return true;
     489                 :            : 
     490                 :          0 :                 cc->contended = true;
     491                 :            :         }
     492                 :            : 
     493                 :          3 :         spin_lock_irqsave(lock, *flags);
     494                 :          3 :         return true;
     495                 :            : }
     496                 :            : 
     497                 :            : /*
     498                 :            :  * Compaction requires the taking of some coarse locks that are potentially
     499                 :            :  * very heavily contended. The lock should be periodically unlocked to avoid
     500                 :            :  * having disabled IRQs for a long time, even when there is nobody waiting on
     501                 :            :  * the lock. It might also be that allowing the IRQs will result in
     502                 :            :  * need_resched() becoming true. If scheduling is needed, async compaction
     503                 :            :  * aborts. Sync compaction schedules.
     504                 :            :  * Either compaction type will also abort if a fatal signal is pending.
     505                 :            :  * In either case if the lock was locked, it is dropped and not regained.
     506                 :            :  *
     507                 :            :  * Returns true if compaction should abort due to fatal signal pending, or
     508                 :            :  *              async compaction due to need_resched()
     509                 :            :  * Returns false when compaction can continue (sync compaction might have
     510                 :            :  *              scheduled)
     511                 :            :  */
     512                 :          3 : static bool compact_unlock_should_abort(spinlock_t *lock,
     513                 :            :                 unsigned long flags, bool *locked, struct compact_control *cc)
     514                 :            : {
     515                 :          3 :         if (*locked) {
     516                 :            :                 spin_unlock_irqrestore(lock, flags);
     517                 :          1 :                 *locked = false;
     518                 :            :         }
     519                 :            : 
     520                 :          3 :         if (fatal_signal_pending(current)) {
     521                 :          0 :                 cc->contended = true;
     522                 :          0 :                 return true;
     523                 :            :         }
     524                 :            : 
     525                 :          3 :         cond_resched();
     526                 :            : 
     527                 :          3 :         return false;
     528                 :            : }
     529                 :            : 
     530                 :            : /*
     531                 :            :  * Isolate free pages onto a private freelist. If @strict is true, will abort
     532                 :            :  * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
     533                 :            :  * (even though it may still end up isolating some pages).
     534                 :            :  */
     535                 :          3 : static unsigned long isolate_freepages_block(struct compact_control *cc,
     536                 :            :                                 unsigned long *start_pfn,
     537                 :            :                                 unsigned long end_pfn,
     538                 :            :                                 struct list_head *freelist,
     539                 :            :                                 unsigned int stride,
     540                 :            :                                 bool strict)
     541                 :            : {
     542                 :            :         int nr_scanned = 0, total_isolated = 0;
     543                 :            :         struct page *cursor;
     544                 :          3 :         unsigned long flags = 0;
     545                 :          3 :         bool locked = false;
     546                 :          3 :         unsigned long blockpfn = *start_pfn;
     547                 :            :         unsigned int order;
     548                 :            : 
     549                 :            :         /* Strict mode is for isolation, speed is secondary */
     550                 :          3 :         if (strict)
     551                 :            :                 stride = 1;
     552                 :            : 
     553                 :          3 :         cursor = pfn_to_page(blockpfn);
     554                 :            : 
     555                 :            :         /* Isolate free pages. */
     556                 :          3 :         for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) {
     557                 :            :                 int isolated;
     558                 :            :                 struct page *page = cursor;
     559                 :            : 
     560                 :            :                 /*
     561                 :            :                  * Periodically drop the lock (if held) regardless of its
     562                 :            :                  * contention, to give chance to IRQs. Abort if fatal signal
     563                 :            :                  * pending or async compaction detects need_resched()
     564                 :            :                  */
     565                 :          3 :                 if (!(blockpfn % SWAP_CLUSTER_MAX)
     566                 :          3 :                     && compact_unlock_should_abort(&cc->zone->lock, flags,
     567                 :            :                                                                 &locked, cc))
     568                 :            :                         break;
     569                 :            : 
     570                 :          3 :                 nr_scanned++;
     571                 :            :                 if (!pfn_valid_within(blockpfn))
     572                 :            :                         goto isolate_fail;
     573                 :            : 
     574                 :            :                 /*
     575                 :            :                  * For compound pages such as THP and hugetlbfs, we can save
     576                 :            :                  * potentially a lot of iterations if we skip them at once.
     577                 :            :                  * The check is racy, but we can consider only valid values
     578                 :            :                  * and the only danger is skipping too much.
     579                 :            :                  */
     580                 :          3 :                 if (PageCompound(page)) {
     581                 :            :                         const unsigned int order = compound_order(page);
     582                 :            : 
     583                 :          0 :                         if (likely(order < MAX_ORDER)) {
     584                 :          0 :                                 blockpfn += (1UL << order) - 1;
     585                 :          0 :                                 cursor += (1UL << order) - 1;
     586                 :            :                         }
     587                 :            :                         goto isolate_fail;
     588                 :            :                 }
     589                 :            : 
     590                 :          3 :                 if (!PageBuddy(page))
     591                 :            :                         goto isolate_fail;
     592                 :            : 
     593                 :            :                 /*
     594                 :            :                  * If we already hold the lock, we can skip some rechecking.
     595                 :            :                  * Note that if we hold the lock now, checked_pageblock was
     596                 :            :                  * already set in some previous iteration (or strict is true),
     597                 :            :                  * so it is correct to skip the suitable migration target
     598                 :            :                  * recheck as well.
     599                 :            :                  */
     600                 :          3 :                 if (!locked) {
     601                 :          3 :                         locked = compact_lock_irqsave(&cc->zone->lock,
     602                 :            :                                                                 &flags, cc);
     603                 :            : 
     604                 :            :                         /* Recheck this is a buddy page under lock */
     605                 :          3 :                         if (!PageBuddy(page))
     606                 :            :                                 goto isolate_fail;
     607                 :            :                 }
     608                 :            : 
     609                 :            :                 /* Found a free page, will break it into order-0 pages */
     610                 :            :                 order = page_order(page);
     611                 :          3 :                 isolated = __isolate_free_page(page, order);
     612                 :          3 :                 if (!isolated)
     613                 :            :                         break;
     614                 :          3 :                 set_page_private(page, order);
     615                 :            : 
     616                 :          3 :                 total_isolated += isolated;
     617                 :          3 :                 cc->nr_freepages += isolated;
     618                 :          3 :                 list_add_tail(&page->lru, freelist);
     619                 :            : 
     620                 :          3 :                 if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
     621                 :          0 :                         blockpfn += isolated;
     622                 :          0 :                         break;
     623                 :            :                 }
     624                 :            :                 /* Advance to the end of split page */
     625                 :          3 :                 blockpfn += isolated - 1;
     626                 :          3 :                 cursor += isolated - 1;
     627                 :          3 :                 continue;
     628                 :            : 
     629                 :            : isolate_fail:
     630                 :          0 :                 if (strict)
     631                 :            :                         break;
     632                 :            :                 else
     633                 :          0 :                         continue;
     634                 :            : 
     635                 :            :         }
     636                 :            : 
     637                 :          3 :         if (locked)
     638                 :          3 :                 spin_unlock_irqrestore(&cc->zone->lock, flags);
     639                 :            : 
     640                 :            :         /*
     641                 :            :          * There is a tiny chance that we have read bogus compound_order(),
     642                 :            :          * so be careful to not go outside of the pageblock.
     643                 :            :          */
     644                 :          3 :         if (unlikely(blockpfn > end_pfn))
     645                 :            :                 blockpfn = end_pfn;
     646                 :            : 
     647                 :          3 :         trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
     648                 :            :                                         nr_scanned, total_isolated);
     649                 :            : 
     650                 :            :         /* Record how far we have got within the block */
     651                 :          3 :         *start_pfn = blockpfn;
     652                 :            : 
     653                 :            :         /*
     654                 :            :          * If strict isolation is requested by CMA then check that all the
     655                 :            :          * pages requested were isolated. If there were any failures, 0 is
     656                 :            :          * returned and CMA will fail.
     657                 :            :          */
     658                 :          3 :         if (strict && blockpfn < end_pfn)
     659                 :            :                 total_isolated = 0;
     660                 :            : 
     661                 :          3 :         cc->total_free_scanned += nr_scanned;
     662                 :          3 :         if (total_isolated)
     663                 :            :                 count_compact_events(COMPACTISOLATED, total_isolated);
     664                 :          3 :         return total_isolated;
     665                 :            : }
     666                 :            : 
     667                 :            : /**
     668                 :            :  * isolate_freepages_range() - isolate free pages.
     669                 :            :  * @cc:        Compaction control structure.
     670                 :            :  * @start_pfn: The first PFN to start isolating.
     671                 :            :  * @end_pfn:   The one-past-last PFN.
     672                 :            :  *
     673                 :            :  * Non-free pages, invalid PFNs, or zone boundaries within the
     674                 :            :  * [start_pfn, end_pfn) range are considered errors, cause function to
     675                 :            :  * undo its actions and return zero.
     676                 :            :  *
     677                 :            :  * Otherwise, function returns one-past-the-last PFN of isolated page
     678                 :            :  * (which may be greater then end_pfn if end fell in a middle of
     679                 :            :  * a free page).
     680                 :            :  */
     681                 :            : unsigned long
     682                 :          3 : isolate_freepages_range(struct compact_control *cc,
     683                 :            :                         unsigned long start_pfn, unsigned long end_pfn)
     684                 :            : {
     685                 :            :         unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
     686                 :          3 :         LIST_HEAD(freelist);
     687                 :            : 
     688                 :            :         pfn = start_pfn;
     689                 :          3 :         block_start_pfn = pageblock_start_pfn(pfn);
     690                 :          3 :         if (block_start_pfn < cc->zone->zone_start_pfn)
     691                 :            :                 block_start_pfn = cc->zone->zone_start_pfn;
     692                 :          3 :         block_end_pfn = pageblock_end_pfn(pfn);
     693                 :            : 
     694                 :          3 :         for (; pfn < end_pfn; pfn += isolated,
     695                 :            :                                 block_start_pfn = block_end_pfn,
     696                 :          3 :                                 block_end_pfn += pageblock_nr_pages) {
     697                 :            :                 /* Protect pfn from changing by isolate_freepages_block */
     698                 :          3 :                 unsigned long isolate_start_pfn = pfn;
     699                 :            : 
     700                 :          3 :                 block_end_pfn = min(block_end_pfn, end_pfn);
     701                 :            : 
     702                 :            :                 /*
     703                 :            :                  * pfn could pass the block_end_pfn if isolated freepage
     704                 :            :                  * is more than pageblock order. In this case, we adjust
     705                 :            :                  * scanning range to right one.
     706                 :            :                  */
     707                 :          3 :                 if (pfn >= block_end_pfn) {
     708                 :          0 :                         block_start_pfn = pageblock_start_pfn(pfn);
     709                 :          0 :                         block_end_pfn = pageblock_end_pfn(pfn);
     710                 :          0 :                         block_end_pfn = min(block_end_pfn, end_pfn);
     711                 :            :                 }
     712                 :            : 
     713                 :          3 :                 if (!pageblock_pfn_to_page(block_start_pfn,
     714                 :            :                                         block_end_pfn, cc->zone))
     715                 :            :                         break;
     716                 :            : 
     717                 :          3 :                 isolated = isolate_freepages_block(cc, &isolate_start_pfn,
     718                 :            :                                         block_end_pfn, &freelist, 0, true);
     719                 :            : 
     720                 :            :                 /*
     721                 :            :                  * In strict mode, isolate_freepages_block() returns 0 if
     722                 :            :                  * there are any holes in the block (ie. invalid PFNs or
     723                 :            :                  * non-free pages).
     724                 :            :                  */
     725                 :          3 :                 if (!isolated)
     726                 :            :                         break;
     727                 :            : 
     728                 :            :                 /*
     729                 :            :                  * If we managed to isolate pages, it is always (1 << n) *
     730                 :            :                  * pageblock_nr_pages for some non-negative n.  (Max order
     731                 :            :                  * page may span two pageblocks).
     732                 :            :                  */
     733                 :            :         }
     734                 :            : 
     735                 :            :         /* __isolate_free_page() does not map the pages */
     736                 :          3 :         split_map_pages(&freelist);
     737                 :            : 
     738                 :          3 :         if (pfn < end_pfn) {
     739                 :            :                 /* Loop terminated early, cleanup. */
     740                 :          0 :                 release_freepages(&freelist);
     741                 :          0 :                 return 0;
     742                 :            :         }
     743                 :            : 
     744                 :            :         /* We don't use freelists for anything. */
     745                 :          3 :         return pfn;
     746                 :            : }
     747                 :            : 
     748                 :            : /* Similar to reclaim, but different enough that they don't share logic */
     749                 :          3 : static bool too_many_isolated(pg_data_t *pgdat)
     750                 :            : {
     751                 :            :         unsigned long active, inactive, isolated;
     752                 :            : 
     753                 :          3 :         inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
     754                 :            :                         node_page_state(pgdat, NR_INACTIVE_ANON);
     755                 :          3 :         active = node_page_state(pgdat, NR_ACTIVE_FILE) +
     756                 :            :                         node_page_state(pgdat, NR_ACTIVE_ANON);
     757                 :          3 :         isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
     758                 :            :                         node_page_state(pgdat, NR_ISOLATED_ANON);
     759                 :            : 
     760                 :          3 :         return isolated > (inactive + active) / 2;
     761                 :            : }
     762                 :            : 
     763                 :            : /**
     764                 :            :  * isolate_migratepages_block() - isolate all migrate-able pages within
     765                 :            :  *                                a single pageblock
     766                 :            :  * @cc:         Compaction control structure.
     767                 :            :  * @low_pfn:    The first PFN to isolate
     768                 :            :  * @end_pfn:    The one-past-the-last PFN to isolate, within same pageblock
     769                 :            :  * @isolate_mode: Isolation mode to be used.
     770                 :            :  *
     771                 :            :  * Isolate all pages that can be migrated from the range specified by
     772                 :            :  * [low_pfn, end_pfn). The range is expected to be within same pageblock.
     773                 :            :  * Returns zero if there is a fatal signal pending, otherwise PFN of the
     774                 :            :  * first page that was not scanned (which may be both less, equal to or more
     775                 :            :  * than end_pfn).
     776                 :            :  *
     777                 :            :  * The pages are isolated on cc->migratepages list (not required to be empty),
     778                 :            :  * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
     779                 :            :  * is neither read nor updated.
     780                 :            :  */
     781                 :            : static unsigned long
     782                 :          3 : isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
     783                 :            :                         unsigned long end_pfn, isolate_mode_t isolate_mode)
     784                 :            : {
     785                 :          3 :         pg_data_t *pgdat = cc->zone->zone_pgdat;
     786                 :            :         unsigned long nr_scanned = 0, nr_isolated = 0;
     787                 :            :         struct lruvec *lruvec;
     788                 :          3 :         unsigned long flags = 0;
     789                 :          3 :         bool locked = false;
     790                 :            :         struct page *page = NULL, *valid_page = NULL;
     791                 :            :         unsigned long start_pfn = low_pfn;
     792                 :            :         bool skip_on_failure = false;
     793                 :            :         unsigned long next_skip_pfn = 0;
     794                 :            :         bool skip_updated = false;
     795                 :            : 
     796                 :            :         /*
     797                 :            :          * Ensure that there are not too many pages isolated from the LRU
     798                 :            :          * list by either parallel reclaimers or compaction. If there are,
     799                 :            :          * delay for some time until fewer pages are isolated
     800                 :            :          */
     801                 :          3 :         while (unlikely(too_many_isolated(pgdat))) {
     802                 :            :                 /* async migration should just abort */
     803                 :          0 :                 if (cc->mode == MIGRATE_ASYNC)
     804                 :            :                         return 0;
     805                 :            : 
     806                 :          0 :                 congestion_wait(BLK_RW_ASYNC, HZ/10);
     807                 :            : 
     808                 :          0 :                 if (fatal_signal_pending(current))
     809                 :            :                         return 0;
     810                 :            :         }
     811                 :            : 
     812                 :          3 :         cond_resched();
     813                 :            : 
     814                 :          3 :         if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
     815                 :            :                 skip_on_failure = true;
     816                 :          0 :                 next_skip_pfn = block_end_pfn(low_pfn, cc->order);
     817                 :            :         }
     818                 :            : 
     819                 :            :         /* Time to isolate some pages for migration */
     820                 :          3 :         for (; low_pfn < end_pfn; low_pfn++) {
     821                 :            : 
     822                 :          3 :                 if (skip_on_failure && low_pfn >= next_skip_pfn) {
     823                 :            :                         /*
     824                 :            :                          * We have isolated all migration candidates in the
     825                 :            :                          * previous order-aligned block, and did not skip it due
     826                 :            :                          * to failure. We should migrate the pages now and
     827                 :            :                          * hopefully succeed compaction.
     828                 :            :                          */
     829                 :          0 :                         if (nr_isolated)
     830                 :            :                                 break;
     831                 :            : 
     832                 :            :                         /*
     833                 :            :                          * We failed to isolate in the previous order-aligned
     834                 :            :                          * block. Set the new boundary to the end of the
     835                 :            :                          * current block. Note we can't simply increase
     836                 :            :                          * next_skip_pfn by 1 << order, as low_pfn might have
     837                 :            :                          * been incremented by a higher number due to skipping
     838                 :            :                          * a compound or a high-order buddy page in the
     839                 :            :                          * previous loop iteration.
     840                 :            :                          */
     841                 :          0 :                         next_skip_pfn = block_end_pfn(low_pfn, cc->order);
     842                 :            :                 }
     843                 :            : 
     844                 :            :                 /*
     845                 :            :                  * Periodically drop the lock (if held) regardless of its
     846                 :            :                  * contention, to give chance to IRQs. Abort completely if
     847                 :            :                  * a fatal signal is pending.
     848                 :            :                  */
     849                 :          3 :                 if (!(low_pfn % SWAP_CLUSTER_MAX)
     850                 :          3 :                     && compact_unlock_should_abort(&pgdat->lru_lock,
     851                 :            :                                             flags, &locked, cc)) {
     852                 :            :                         low_pfn = 0;
     853                 :            :                         goto fatal_pending;
     854                 :            :                 }
     855                 :            : 
     856                 :            :                 if (!pfn_valid_within(low_pfn))
     857                 :            :                         goto isolate_fail;
     858                 :          3 :                 nr_scanned++;
     859                 :            : 
     860                 :          3 :                 page = pfn_to_page(low_pfn);
     861                 :            : 
     862                 :            :                 /*
     863                 :            :                  * Check if the pageblock has already been marked skipped.
     864                 :            :                  * Only the aligned PFN is checked as the caller isolates
     865                 :            :                  * COMPACT_CLUSTER_MAX at a time so the second call must
     866                 :            :                  * not falsely conclude that the block should be skipped.
     867                 :            :                  */
     868                 :          3 :                 if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
     869                 :          3 :                         if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
     870                 :            :                                 low_pfn = end_pfn;
     871                 :            :                                 goto isolate_abort;
     872                 :            :                         }
     873                 :            :                         valid_page = page;
     874                 :            :                 }
     875                 :            : 
     876                 :            :                 /*
     877                 :            :                  * Skip if free. We read page order here without zone lock
     878                 :            :                  * which is generally unsafe, but the race window is small and
     879                 :            :                  * the worst thing that can happen is that we skip some
     880                 :            :                  * potential isolation targets.
     881                 :            :                  */
     882                 :          3 :                 if (PageBuddy(page)) {
     883                 :            :                         unsigned long freepage_order = page_order_unsafe(page);
     884                 :            : 
     885                 :            :                         /*
     886                 :            :                          * Without lock, we cannot be sure that what we got is
     887                 :            :                          * a valid page order. Consider only values in the
     888                 :            :                          * valid order range to prevent low_pfn overflow.
     889                 :            :                          */
     890                 :          3 :                         if (freepage_order > 0 && freepage_order < MAX_ORDER)
     891                 :          3 :                                 low_pfn += (1UL << freepage_order) - 1;
     892                 :          3 :                         continue;
     893                 :            :                 }
     894                 :            : 
     895                 :            :                 /*
     896                 :            :                  * Regardless of being on LRU, compound pages such as THP and
     897                 :            :                  * hugetlbfs are not to be compacted. We can potentially save
     898                 :            :                  * a lot of iterations if we skip them at once. The check is
     899                 :            :                  * racy, but we can consider only valid values and the only
     900                 :            :                  * danger is skipping too much.
     901                 :            :                  */
     902                 :          0 :                 if (PageCompound(page)) {
     903                 :            :                         const unsigned int order = compound_order(page);
     904                 :            : 
     905                 :          0 :                         if (likely(order < MAX_ORDER))
     906                 :          0 :                                 low_pfn += (1UL << order) - 1;
     907                 :            :                         goto isolate_fail;
     908                 :            :                 }
     909                 :            : 
     910                 :            :                 /*
     911                 :            :                  * Check may be lockless but that's ok as we recheck later.
     912                 :            :                  * It's possible to migrate LRU and non-lru movable pages.
     913                 :            :                  * Skip any other type of page
     914                 :            :                  */
     915                 :          0 :                 if (!PageLRU(page)) {
     916                 :            :                         /*
     917                 :            :                          * __PageMovable can return false positive so we need
     918                 :            :                          * to verify it under page_lock.
     919                 :            :                          */
     920                 :          0 :                         if (unlikely(__PageMovable(page)) &&
     921                 :            :                                         !PageIsolated(page)) {
     922                 :          0 :                                 if (locked) {
     923                 :          0 :                                         spin_unlock_irqrestore(&pgdat->lru_lock,
     924                 :            :                                                                         flags);
     925                 :          0 :                                         locked = false;
     926                 :            :                                 }
     927                 :            : 
     928                 :          0 :                                 if (!isolate_movable_page(page, isolate_mode))
     929                 :            :                                         goto isolate_success;
     930                 :            :                         }
     931                 :            : 
     932                 :            :                         goto isolate_fail;
     933                 :            :                 }
     934                 :            : 
     935                 :            :                 /*
     936                 :            :                  * Migration will fail if an anonymous page is pinned in memory,
     937                 :            :                  * so avoid taking lru_lock and isolating it unnecessarily in an
     938                 :            :                  * admittedly racy check.
     939                 :            :                  */
     940                 :          0 :                 if (!page_mapping(page) &&
     941                 :          0 :                     page_count(page) > page_mapcount(page))
     942                 :            :                         goto isolate_fail;
     943                 :            : 
     944                 :            :                 /*
     945                 :            :                  * Only allow to migrate anonymous pages in GFP_NOFS context
     946                 :            :                  * because those do not depend on fs locks.
     947                 :            :                  */
     948                 :          0 :                 if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
     949                 :            :                         goto isolate_fail;
     950                 :            : 
     951                 :            :                 /* If we already hold the lock, we can skip some rechecking */
     952                 :          0 :                 if (!locked) {
     953                 :          0 :                         locked = compact_lock_irqsave(&pgdat->lru_lock,
     954                 :            :                                                                 &flags, cc);
     955                 :            : 
     956                 :            :                         /* Try get exclusive access under lock */
     957                 :          0 :                         if (!skip_updated) {
     958                 :            :                                 skip_updated = true;
     959                 :          0 :                                 if (test_and_set_skip(cc, page, low_pfn))
     960                 :            :                                         goto isolate_abort;
     961                 :            :                         }
     962                 :            : 
     963                 :            :                         /* Recheck PageLRU and PageCompound under lock */
     964                 :          0 :                         if (!PageLRU(page))
     965                 :            :                                 goto isolate_fail;
     966                 :            : 
     967                 :            :                         /*
     968                 :            :                          * Page become compound since the non-locked check,
     969                 :            :                          * and it's on LRU. It can only be a THP so the order
     970                 :            :                          * is safe to read and it's 0 for tail pages.
     971                 :            :                          */
     972                 :          0 :                         if (unlikely(PageCompound(page))) {
     973                 :          0 :                                 low_pfn += compound_nr(page) - 1;
     974                 :          0 :                                 goto isolate_fail;
     975                 :            :                         }
     976                 :            :                 }
     977                 :            : 
     978                 :          0 :                 lruvec = mem_cgroup_page_lruvec(page, pgdat);
     979                 :            : 
     980                 :            :                 /* Try isolate the page */
     981                 :          0 :                 if (__isolate_lru_page(page, isolate_mode) != 0)
     982                 :            :                         goto isolate_fail;
     983                 :            : 
     984                 :            :                 VM_BUG_ON_PAGE(PageCompound(page), page);
     985                 :            : 
     986                 :            :                 /* Successfully isolated */
     987                 :            :                 del_page_from_lru_list(page, lruvec, page_lru(page));
     988                 :          0 :                 inc_node_page_state(page,
     989                 :          0 :                                 NR_ISOLATED_ANON + page_is_file_cache(page));
     990                 :            : 
     991                 :            : isolate_success:
     992                 :          0 :                 list_add(&page->lru, &cc->migratepages);
     993                 :          0 :                 cc->nr_migratepages++;
     994                 :          0 :                 nr_isolated++;
     995                 :            : 
     996                 :            :                 /*
     997                 :            :                  * Avoid isolating too much unless this block is being
     998                 :            :                  * rescanned (e.g. dirty/writeback pages, parallel allocation)
     999                 :            :                  * or a lock is contended. For contention, isolate quickly to
    1000                 :            :                  * potentially remove one source of contention.
    1001                 :            :                  */
    1002                 :          0 :                 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX &&
    1003                 :          0 :                     !cc->rescan && !cc->contended) {
    1004                 :          0 :                         ++low_pfn;
    1005                 :          0 :                         break;
    1006                 :            :                 }
    1007                 :            : 
    1008                 :          0 :                 continue;
    1009                 :            : isolate_fail:
    1010                 :          0 :                 if (!skip_on_failure)
    1011                 :          0 :                         continue;
    1012                 :            : 
    1013                 :            :                 /*
    1014                 :            :                  * We have isolated some pages, but then failed. Release them
    1015                 :            :                  * instead of migrating, as we cannot form the cc->order buddy
    1016                 :            :                  * page anyway.
    1017                 :            :                  */
    1018                 :          0 :                 if (nr_isolated) {
    1019                 :          0 :                         if (locked) {
    1020                 :          0 :                                 spin_unlock_irqrestore(&pgdat->lru_lock, flags);
    1021                 :          0 :                                 locked = false;
    1022                 :            :                         }
    1023                 :          0 :                         putback_movable_pages(&cc->migratepages);
    1024                 :          0 :                         cc->nr_migratepages = 0;
    1025                 :            :                         nr_isolated = 0;
    1026                 :            :                 }
    1027                 :            : 
    1028                 :          0 :                 if (low_pfn < next_skip_pfn) {
    1029                 :          0 :                         low_pfn = next_skip_pfn - 1;
    1030                 :            :                         /*
    1031                 :            :                          * The check near the loop beginning would have updated
    1032                 :            :                          * next_skip_pfn too, but this is a bit simpler.
    1033                 :            :                          */
    1034                 :          0 :                         next_skip_pfn += 1UL << cc->order;
    1035                 :            :                 }
    1036                 :            :         }
    1037                 :            : 
    1038                 :            :         /*
    1039                 :            :          * The PageBuddy() check could have potentially brought us outside
    1040                 :            :          * the range to be scanned.
    1041                 :            :          */
    1042                 :          3 :         if (unlikely(low_pfn > end_pfn))
    1043                 :            :                 low_pfn = end_pfn;
    1044                 :            : 
    1045                 :            : isolate_abort:
    1046                 :          3 :         if (locked)
    1047                 :          0 :                 spin_unlock_irqrestore(&pgdat->lru_lock, flags);
    1048                 :            : 
    1049                 :            :         /*
    1050                 :            :          * Updated the cached scanner pfn once the pageblock has been scanned
    1051                 :            :          * Pages will either be migrated in which case there is no point
    1052                 :            :          * scanning in the near future or migration failed in which case the
    1053                 :            :          * failure reason may persist. The block is marked for skipping if
    1054                 :            :          * there were no pages isolated in the block or if the block is
    1055                 :            :          * rescanned twice in a row.
    1056                 :            :          */
    1057                 :          3 :         if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) {
    1058                 :          3 :                 if (valid_page && !skip_updated)
    1059                 :          3 :                         set_pageblock_skip(valid_page);
    1060                 :          3 :                 update_cached_migrate(cc, low_pfn);
    1061                 :            :         }
    1062                 :            : 
    1063                 :          3 :         trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
    1064                 :            :                                                 nr_scanned, nr_isolated);
    1065                 :            : 
    1066                 :            : fatal_pending:
    1067                 :          3 :         cc->total_migrate_scanned += nr_scanned;
    1068                 :          3 :         if (nr_isolated)
    1069                 :            :                 count_compact_events(COMPACTISOLATED, nr_isolated);
    1070                 :            : 
    1071                 :          3 :         return low_pfn;
    1072                 :            : }
    1073                 :            : 
    1074                 :            : /**
    1075                 :            :  * isolate_migratepages_range() - isolate migrate-able pages in a PFN range
    1076                 :            :  * @cc:        Compaction control structure.
    1077                 :            :  * @start_pfn: The first PFN to start isolating.
    1078                 :            :  * @end_pfn:   The one-past-last PFN.
    1079                 :            :  *
    1080                 :            :  * Returns zero if isolation fails fatally due to e.g. pending signal.
    1081                 :            :  * Otherwise, function returns one-past-the-last PFN of isolated page
    1082                 :            :  * (which may be greater than end_pfn if end fell in a middle of a THP page).
    1083                 :            :  */
    1084                 :            : unsigned long
    1085                 :          3 : isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
    1086                 :            :                                                         unsigned long end_pfn)
    1087                 :            : {
    1088                 :            :         unsigned long pfn, block_start_pfn, block_end_pfn;
    1089                 :            : 
    1090                 :            :         /* Scan block by block. First and last block may be incomplete */
    1091                 :            :         pfn = start_pfn;
    1092                 :          3 :         block_start_pfn = pageblock_start_pfn(pfn);
    1093                 :          3 :         if (block_start_pfn < cc->zone->zone_start_pfn)
    1094                 :            :                 block_start_pfn = cc->zone->zone_start_pfn;
    1095                 :          3 :         block_end_pfn = pageblock_end_pfn(pfn);
    1096                 :            : 
    1097                 :          3 :         for (; pfn < end_pfn; pfn = block_end_pfn,
    1098                 :            :                                 block_start_pfn = block_end_pfn,
    1099                 :          3 :                                 block_end_pfn += pageblock_nr_pages) {
    1100                 :            : 
    1101                 :          3 :                 block_end_pfn = min(block_end_pfn, end_pfn);
    1102                 :            : 
    1103                 :          3 :                 if (!pageblock_pfn_to_page(block_start_pfn,
    1104                 :            :                                         block_end_pfn, cc->zone))
    1105                 :          0 :                         continue;
    1106                 :            : 
    1107                 :          3 :                 pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
    1108                 :            :                                                         ISOLATE_UNEVICTABLE);
    1109                 :            : 
    1110                 :          3 :                 if (!pfn)
    1111                 :            :                         break;
    1112                 :            : 
    1113                 :          3 :                 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
    1114                 :            :                         break;
    1115                 :            :         }
    1116                 :            : 
    1117                 :          3 :         return pfn;
    1118                 :            : }
    1119                 :            : 
    1120                 :            : #endif /* CONFIG_COMPACTION || CONFIG_CMA */
    1121                 :            : #ifdef CONFIG_COMPACTION
    1122                 :            : 
    1123                 :          0 : static bool suitable_migration_source(struct compact_control *cc,
    1124                 :            :                                                         struct page *page)
    1125                 :            : {
    1126                 :            :         int block_mt;
    1127                 :            : 
    1128                 :          0 :         if (pageblock_skip_persistent(page))
    1129                 :            :                 return false;
    1130                 :            : 
    1131                 :          0 :         if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
    1132                 :            :                 return true;
    1133                 :            : 
    1134                 :          0 :         block_mt = get_pageblock_migratetype(page);
    1135                 :            : 
    1136                 :          0 :         if (cc->migratetype == MIGRATE_MOVABLE)
    1137                 :          0 :                 return is_migrate_movable(block_mt);
    1138                 :            :         else
    1139                 :          0 :                 return block_mt == cc->migratetype;
    1140                 :            : }
    1141                 :            : 
    1142                 :            : /* Returns true if the page is within a block suitable for migration to */
    1143                 :          0 : static bool suitable_migration_target(struct compact_control *cc,
    1144                 :            :                                                         struct page *page)
    1145                 :            : {
    1146                 :            :         /* If the page is a large free page, then disallow migration */
    1147                 :          0 :         if (PageBuddy(page)) {
    1148                 :            :                 /*
    1149                 :            :                  * We are checking page_order without zone->lock taken. But
    1150                 :            :                  * the only small danger is that we skip a potentially suitable
    1151                 :            :                  * pageblock, so it's not worth to check order for valid range.
    1152                 :            :                  */
    1153                 :          0 :                 if (page_order_unsafe(page) >= pageblock_order)
    1154                 :            :                         return false;
    1155                 :            :         }
    1156                 :            : 
    1157                 :          0 :         if (cc->ignore_block_suitable)
    1158                 :            :                 return true;
    1159                 :            : 
    1160                 :            :         /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
    1161                 :          0 :         if (is_migrate_movable(get_pageblock_migratetype(page)))
    1162                 :            :                 return true;
    1163                 :            : 
    1164                 :            :         /* Otherwise skip the block */
    1165                 :          0 :         return false;
    1166                 :            : }
    1167                 :            : 
    1168                 :            : static inline unsigned int
    1169                 :            : freelist_scan_limit(struct compact_control *cc)
    1170                 :            : {
    1171                 :            :         unsigned short shift = BITS_PER_LONG - 1;
    1172                 :            : 
    1173                 :          0 :         return (COMPACT_CLUSTER_MAX >> min(shift, cc->fast_search_fail)) + 1;
    1174                 :            : }
    1175                 :            : 
    1176                 :            : /*
    1177                 :            :  * Test whether the free scanner has reached the same or lower pageblock than
    1178                 :            :  * the migration scanner, and compaction should thus terminate.
    1179                 :            :  */
    1180                 :            : static inline bool compact_scanners_met(struct compact_control *cc)
    1181                 :            : {
    1182                 :          0 :         return (cc->free_pfn >> pageblock_order)
    1183                 :          0 :                 <= (cc->migrate_pfn >> pageblock_order);
    1184                 :            : }
    1185                 :            : 
    1186                 :            : /*
    1187                 :            :  * Used when scanning for a suitable migration target which scans freelists
    1188                 :            :  * in reverse. Reorders the list such as the unscanned pages are scanned
    1189                 :            :  * first on the next iteration of the free scanner
    1190                 :            :  */
    1191                 :            : static void
    1192                 :          0 : move_freelist_head(struct list_head *freelist, struct page *freepage)
    1193                 :            : {
    1194                 :          0 :         LIST_HEAD(sublist);
    1195                 :            : 
    1196                 :          0 :         if (!list_is_last(freelist, &freepage->lru)) {
    1197                 :            :                 list_cut_before(&sublist, freelist, &freepage->lru);
    1198                 :          0 :                 if (!list_empty(&sublist))
    1199                 :            :                         list_splice_tail(&sublist, freelist);
    1200                 :            :         }
    1201                 :          0 : }
    1202                 :            : 
    1203                 :            : /*
    1204                 :            :  * Similar to move_freelist_head except used by the migration scanner
    1205                 :            :  * when scanning forward. It's possible for these list operations to
    1206                 :            :  * move against each other if they search the free list exactly in
    1207                 :            :  * lockstep.
    1208                 :            :  */
    1209                 :            : static void
    1210                 :          0 : move_freelist_tail(struct list_head *freelist, struct page *freepage)
    1211                 :            : {
    1212                 :          0 :         LIST_HEAD(sublist);
    1213                 :            : 
    1214                 :          0 :         if (!list_is_first(freelist, &freepage->lru)) {
    1215                 :          0 :                 list_cut_position(&sublist, freelist, &freepage->lru);
    1216                 :          0 :                 if (!list_empty(&sublist))
    1217                 :            :                         list_splice_tail(&sublist, freelist);
    1218                 :            :         }
    1219                 :          0 : }
    1220                 :            : 
    1221                 :            : static void
    1222                 :          0 : fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
    1223                 :            : {
    1224                 :            :         unsigned long start_pfn, end_pfn;
    1225                 :          0 :         struct page *page = pfn_to_page(pfn);
    1226                 :            : 
    1227                 :            :         /* Do not search around if there are enough pages already */
    1228                 :          0 :         if (cc->nr_freepages >= cc->nr_migratepages)
    1229                 :          0 :                 return;
    1230                 :            : 
    1231                 :            :         /* Minimise scanning during async compaction */
    1232                 :          0 :         if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC)
    1233                 :            :                 return;
    1234                 :            : 
    1235                 :            :         /* Pageblock boundaries */
    1236                 :          0 :         start_pfn = pageblock_start_pfn(pfn);
    1237                 :          0 :         end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)) - 1;
    1238                 :            : 
    1239                 :            :         /* Scan before */
    1240                 :          0 :         if (start_pfn != pfn) {
    1241                 :          0 :                 isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
    1242                 :          0 :                 if (cc->nr_freepages >= cc->nr_migratepages)
    1243                 :            :                         return;
    1244                 :            :         }
    1245                 :            : 
    1246                 :            :         /* Scan after */
    1247                 :          0 :         start_pfn = pfn + nr_isolated;
    1248                 :          0 :         if (start_pfn < end_pfn)
    1249                 :          0 :                 isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
    1250                 :            : 
    1251                 :            :         /* Skip this pageblock in the future as it's full or nearly full */
    1252                 :          0 :         if (cc->nr_freepages < cc->nr_migratepages)
    1253                 :          0 :                 set_pageblock_skip(page);
    1254                 :            : }
    1255                 :            : 
    1256                 :            : /* Search orders in round-robin fashion */
    1257                 :            : static int next_search_order(struct compact_control *cc, int order)
    1258                 :            : {
    1259                 :          0 :         order--;
    1260                 :          0 :         if (order < 0)
    1261                 :          0 :                 order = cc->order - 1;
    1262                 :            : 
    1263                 :            :         /* Search wrapped around? */
    1264                 :          0 :         if (order == cc->search_order) {
    1265                 :          0 :                 cc->search_order--;
    1266                 :          0 :                 if (cc->search_order < 0)
    1267                 :          0 :                         cc->search_order = cc->order - 1;
    1268                 :            :                 return -1;
    1269                 :            :         }
    1270                 :            : 
    1271                 :            :         return order;
    1272                 :            : }
    1273                 :            : 
    1274                 :            : static unsigned long
    1275                 :          0 : fast_isolate_freepages(struct compact_control *cc)
    1276                 :            : {
    1277                 :          0 :         unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
    1278                 :            :         unsigned int nr_scanned = 0;
    1279                 :            :         unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
    1280                 :            :         unsigned long nr_isolated = 0;
    1281                 :            :         unsigned long distance;
    1282                 :            :         struct page *page = NULL;
    1283                 :            :         bool scan_start = false;
    1284                 :            :         int order;
    1285                 :            : 
    1286                 :            :         /* Full compaction passes in a negative order */
    1287                 :          0 :         if (cc->order <= 0)
    1288                 :          0 :                 return cc->free_pfn;
    1289                 :            : 
    1290                 :            :         /*
    1291                 :            :          * If starting the scan, use a deeper search and use the highest
    1292                 :            :          * PFN found if a suitable one is not found.
    1293                 :            :          */
    1294                 :          0 :         if (cc->free_pfn >= cc->zone->compact_init_free_pfn) {
    1295                 :            :                 limit = pageblock_nr_pages >> 1;
    1296                 :            :                 scan_start = true;
    1297                 :            :         }
    1298                 :            : 
    1299                 :            :         /*
    1300                 :            :          * Preferred point is in the top quarter of the scan space but take
    1301                 :            :          * a pfn from the top half if the search is problematic.
    1302                 :            :          */
    1303                 :          0 :         distance = (cc->free_pfn - cc->migrate_pfn);
    1304                 :          0 :         low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2));
    1305                 :          0 :         min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1));
    1306                 :            : 
    1307                 :          0 :         if (WARN_ON_ONCE(min_pfn > low_pfn))
    1308                 :            :                 low_pfn = min_pfn;
    1309                 :            : 
    1310                 :            :         /*
    1311                 :            :          * Search starts from the last successful isolation order or the next
    1312                 :            :          * order to search after a previous failure
    1313                 :            :          */
    1314                 :          0 :         cc->search_order = min_t(unsigned int, cc->order - 1, cc->search_order);
    1315                 :            : 
    1316                 :          0 :         for (order = cc->search_order;
    1317                 :          0 :              !page && order >= 0;
    1318                 :            :              order = next_search_order(cc, order)) {
    1319                 :          0 :                 struct free_area *area = &cc->zone->free_area[order];
    1320                 :            :                 struct list_head *freelist;
    1321                 :            :                 struct page *freepage;
    1322                 :            :                 unsigned long flags;
    1323                 :            :                 unsigned int order_scanned = 0;
    1324                 :            : 
    1325                 :          0 :                 if (!area->nr_free)
    1326                 :          0 :                         continue;
    1327                 :            : 
    1328                 :          0 :                 spin_lock_irqsave(&cc->zone->lock, flags);
    1329                 :          0 :                 freelist = &area->free_list[MIGRATE_MOVABLE];
    1330                 :          0 :                 list_for_each_entry_reverse(freepage, freelist, lru) {
    1331                 :            :                         unsigned long pfn;
    1332                 :            : 
    1333                 :          0 :                         order_scanned++;
    1334                 :          0 :                         nr_scanned++;
    1335                 :          0 :                         pfn = page_to_pfn(freepage);
    1336                 :            : 
    1337                 :          0 :                         if (pfn >= highest)
    1338                 :          0 :                                 highest = pageblock_start_pfn(pfn);
    1339                 :            : 
    1340                 :          0 :                         if (pfn >= low_pfn) {
    1341                 :          0 :                                 cc->fast_search_fail = 0;
    1342                 :          0 :                                 cc->search_order = order;
    1343                 :          0 :                                 page = freepage;
    1344                 :          0 :                                 break;
    1345                 :            :                         }
    1346                 :            : 
    1347                 :          0 :                         if (pfn >= min_pfn && pfn > high_pfn) {
    1348                 :            :                                 high_pfn = pfn;
    1349                 :            : 
    1350                 :            :                                 /* Shorten the scan if a candidate is found */
    1351                 :          0 :                                 limit >>= 1;
    1352                 :            :                         }
    1353                 :            : 
    1354                 :          0 :                         if (order_scanned >= limit)
    1355                 :            :                                 break;
    1356                 :            :                 }
    1357                 :            : 
    1358                 :            :                 /* Use a minimum pfn if a preferred one was not found */
    1359                 :          0 :                 if (!page && high_pfn) {
    1360                 :          0 :                         page = pfn_to_page(high_pfn);
    1361                 :            : 
    1362                 :            :                         /* Update freepage for the list reorder below */
    1363                 :            :                         freepage = page;
    1364                 :            :                 }
    1365                 :            : 
    1366                 :            :                 /* Reorder to so a future search skips recent pages */
    1367                 :          0 :                 move_freelist_head(freelist, freepage);
    1368                 :            : 
    1369                 :            :                 /* Isolate the page if available */
    1370                 :          0 :                 if (page) {
    1371                 :          0 :                         if (__isolate_free_page(page, order)) {
    1372                 :          0 :                                 set_page_private(page, order);
    1373                 :          0 :                                 nr_isolated = 1 << order;
    1374                 :          0 :                                 cc->nr_freepages += nr_isolated;
    1375                 :          0 :                                 list_add_tail(&page->lru, &cc->freepages);
    1376                 :            :                                 count_compact_events(COMPACTISOLATED, nr_isolated);
    1377                 :            :                         } else {
    1378                 :            :                                 /* If isolation fails, abort the search */
    1379                 :          0 :                                 order = cc->search_order + 1;
    1380                 :            :                                 page = NULL;
    1381                 :            :                         }
    1382                 :            :                 }
    1383                 :            : 
    1384                 :          0 :                 spin_unlock_irqrestore(&cc->zone->lock, flags);
    1385                 :            : 
    1386                 :            :                 /*
    1387                 :            :                  * Smaller scan on next order so the total scan ig related
    1388                 :            :                  * to freelist_scan_limit.
    1389                 :            :                  */
    1390                 :          0 :                 if (order_scanned >= limit)
    1391                 :          0 :                         limit = min(1U, limit >> 1);
    1392                 :            :         }
    1393                 :            : 
    1394                 :          0 :         if (!page) {
    1395                 :          0 :                 cc->fast_search_fail++;
    1396                 :          0 :                 if (scan_start) {
    1397                 :            :                         /*
    1398                 :            :                          * Use the highest PFN found above min. If one was
    1399                 :            :                          * not found, be pessemistic for direct compaction
    1400                 :            :                          * and use the min mark.
    1401                 :            :                          */
    1402                 :          0 :                         if (highest) {
    1403                 :          0 :                                 page = pfn_to_page(highest);
    1404                 :          0 :                                 cc->free_pfn = highest;
    1405                 :            :                         } else {
    1406                 :          0 :                                 if (cc->direct_compaction && pfn_valid(min_pfn)) {
    1407                 :          0 :                                         page = pfn_to_page(min_pfn);
    1408                 :          0 :                                         cc->free_pfn = min_pfn;
    1409                 :            :                                 }
    1410                 :            :                         }
    1411                 :            :                 }
    1412                 :            :         }
    1413                 :            : 
    1414                 :          0 :         if (highest && highest >= cc->zone->compact_cached_free_pfn) {
    1415                 :          0 :                 highest -= pageblock_nr_pages;
    1416                 :          0 :                 cc->zone->compact_cached_free_pfn = highest;
    1417                 :            :         }
    1418                 :            : 
    1419                 :          0 :         cc->total_free_scanned += nr_scanned;
    1420                 :          0 :         if (!page)
    1421                 :          0 :                 return cc->free_pfn;
    1422                 :            : 
    1423                 :          0 :         low_pfn = page_to_pfn(page);
    1424                 :          0 :         fast_isolate_around(cc, low_pfn, nr_isolated);
    1425                 :          0 :         return low_pfn;
    1426                 :            : }
    1427                 :            : 
    1428                 :            : /*
    1429                 :            :  * Based on information in the current compact_control, find blocks
    1430                 :            :  * suitable for isolating free pages from and then isolate them.
    1431                 :            :  */
    1432                 :          0 : static void isolate_freepages(struct compact_control *cc)
    1433                 :            : {
    1434                 :          0 :         struct zone *zone = cc->zone;
    1435                 :            :         struct page *page;
    1436                 :            :         unsigned long block_start_pfn;  /* start of current pageblock */
    1437                 :            :         unsigned long isolate_start_pfn; /* exact pfn we start at */
    1438                 :            :         unsigned long block_end_pfn;    /* end of current pageblock */
    1439                 :            :         unsigned long low_pfn;       /* lowest pfn scanner is able to scan */
    1440                 :          0 :         struct list_head *freelist = &cc->freepages;
    1441                 :            :         unsigned int stride;
    1442                 :            : 
    1443                 :            :         /* Try a small search of the free lists for a candidate */
    1444                 :          0 :         isolate_start_pfn = fast_isolate_freepages(cc);
    1445                 :          0 :         if (cc->nr_freepages)
    1446                 :            :                 goto splitmap;
    1447                 :            : 
    1448                 :            :         /*
    1449                 :            :          * Initialise the free scanner. The starting point is where we last
    1450                 :            :          * successfully isolated from, zone-cached value, or the end of the
    1451                 :            :          * zone when isolating for the first time. For looping we also need
    1452                 :            :          * this pfn aligned down to the pageblock boundary, because we do
    1453                 :            :          * block_start_pfn -= pageblock_nr_pages in the for loop.
    1454                 :            :          * For ending point, take care when isolating in last pageblock of a
    1455                 :            :          * a zone which ends in the middle of a pageblock.
    1456                 :            :          * The low boundary is the end of the pageblock the migration scanner
    1457                 :            :          * is using.
    1458                 :            :          */
    1459                 :          0 :         isolate_start_pfn = cc->free_pfn;
    1460                 :          0 :         block_start_pfn = pageblock_start_pfn(isolate_start_pfn);
    1461                 :          0 :         block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
    1462                 :            :                                                 zone_end_pfn(zone));
    1463                 :          0 :         low_pfn = pageblock_end_pfn(cc->migrate_pfn);
    1464                 :          0 :         stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;
    1465                 :            : 
    1466                 :            :         /*
    1467                 :            :          * Isolate free pages until enough are available to migrate the
    1468                 :            :          * pages on cc->migratepages. We stop searching if the migrate
    1469                 :            :          * and free page scanners meet or enough free pages are isolated.
    1470                 :            :          */
    1471                 :          0 :         for (; block_start_pfn >= low_pfn;
    1472                 :            :                                 block_end_pfn = block_start_pfn,
    1473                 :          0 :                                 block_start_pfn -= pageblock_nr_pages,
    1474                 :          0 :                                 isolate_start_pfn = block_start_pfn) {
    1475                 :            :                 unsigned long nr_isolated;
    1476                 :            : 
    1477                 :            :                 /*
    1478                 :            :                  * This can iterate a massively long zone without finding any
    1479                 :            :                  * suitable migration targets, so periodically check resched.
    1480                 :            :                  */
    1481                 :          0 :                 if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
    1482                 :          0 :                         cond_resched();
    1483                 :            : 
    1484                 :          0 :                 page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
    1485                 :            :                                                                         zone);
    1486                 :          0 :                 if (!page)
    1487                 :          0 :                         continue;
    1488                 :            : 
    1489                 :            :                 /* Check the block is suitable for migration */
    1490                 :          0 :                 if (!suitable_migration_target(cc, page))
    1491                 :          0 :                         continue;
    1492                 :            : 
    1493                 :            :                 /* If isolation recently failed, do not retry */
    1494                 :          0 :                 if (!isolation_suitable(cc, page))
    1495                 :          0 :                         continue;
    1496                 :            : 
    1497                 :            :                 /* Found a block suitable for isolating free pages from. */
    1498                 :          0 :                 nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
    1499                 :            :                                         block_end_pfn, freelist, stride, false);
    1500                 :            : 
    1501                 :            :                 /* Update the skip hint if the full pageblock was scanned */
    1502                 :          0 :                 if (isolate_start_pfn == block_end_pfn)
    1503                 :          0 :                         update_pageblock_skip(cc, page, block_start_pfn);
    1504                 :            : 
    1505                 :            :                 /* Are enough freepages isolated? */
    1506                 :          0 :                 if (cc->nr_freepages >= cc->nr_migratepages) {
    1507                 :          0 :                         if (isolate_start_pfn >= block_end_pfn) {
    1508                 :            :                                 /*
    1509                 :            :                                  * Restart at previous pageblock if more
    1510                 :            :                                  * freepages can be isolated next time.
    1511                 :            :                                  */
    1512                 :          0 :                                 isolate_start_pfn =
    1513                 :          0 :                                         block_start_pfn - pageblock_nr_pages;
    1514                 :            :                         }
    1515                 :            :                         break;
    1516                 :          0 :                 } else if (isolate_start_pfn < block_end_pfn) {
    1517                 :            :                         /*
    1518                 :            :                          * If isolation failed early, do not continue
    1519                 :            :                          * needlessly.
    1520                 :            :                          */
    1521                 :            :                         break;
    1522                 :            :                 }
    1523                 :            : 
    1524                 :            :                 /* Adjust stride depending on isolation */
    1525                 :          0 :                 if (nr_isolated) {
    1526                 :            :                         stride = 1;
    1527                 :          0 :                         continue;
    1528                 :            :                 }
    1529                 :          0 :                 stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1);
    1530                 :            :         }
    1531                 :            : 
    1532                 :            :         /*
    1533                 :            :          * Record where the free scanner will restart next time. Either we
    1534                 :            :          * broke from the loop and set isolate_start_pfn based on the last
    1535                 :            :          * call to isolate_freepages_block(), or we met the migration scanner
    1536                 :            :          * and the loop terminated due to isolate_start_pfn < low_pfn
    1537                 :            :          */
    1538                 :          0 :         cc->free_pfn = isolate_start_pfn;
    1539                 :            : 
    1540                 :            : splitmap:
    1541                 :            :         /* __isolate_free_page() does not map the pages */
    1542                 :          0 :         split_map_pages(freelist);
    1543                 :          0 : }
    1544                 :            : 
    1545                 :            : /*
    1546                 :            :  * This is a migrate-callback that "allocates" freepages by taking pages
    1547                 :            :  * from the isolated freelists in the block we are migrating to.
    1548                 :            :  */
    1549                 :          0 : static struct page *compaction_alloc(struct page *migratepage,
    1550                 :            :                                         unsigned long data)
    1551                 :            : {
    1552                 :          0 :         struct compact_control *cc = (struct compact_control *)data;
    1553                 :            :         struct page *freepage;
    1554                 :            : 
    1555                 :          0 :         if (list_empty(&cc->freepages)) {
    1556                 :          0 :                 isolate_freepages(cc);
    1557                 :            : 
    1558                 :          0 :                 if (list_empty(&cc->freepages))
    1559                 :            :                         return NULL;
    1560                 :            :         }
    1561                 :            : 
    1562                 :          0 :         freepage = list_entry(cc->freepages.next, struct page, lru);
    1563                 :            :         list_del(&freepage->lru);
    1564                 :          0 :         cc->nr_freepages--;
    1565                 :            : 
    1566                 :          0 :         return freepage;
    1567                 :            : }
    1568                 :            : 
    1569                 :            : /*
    1570                 :            :  * This is a migrate-callback that "frees" freepages back to the isolated
    1571                 :            :  * freelist.  All pages on the freelist are from the same zone, so there is no
    1572                 :            :  * special handling needed for NUMA.
    1573                 :            :  */
    1574                 :          0 : static void compaction_free(struct page *page, unsigned long data)
    1575                 :            : {
    1576                 :          0 :         struct compact_control *cc = (struct compact_control *)data;
    1577                 :            : 
    1578                 :          0 :         list_add(&page->lru, &cc->freepages);
    1579                 :          0 :         cc->nr_freepages++;
    1580                 :          0 : }
    1581                 :            : 
    1582                 :            : /* possible outcome of isolate_migratepages */
    1583                 :            : typedef enum {
    1584                 :            :         ISOLATE_ABORT,          /* Abort compaction now */
    1585                 :            :         ISOLATE_NONE,           /* No pages isolated, continue scanning */
    1586                 :            :         ISOLATE_SUCCESS,        /* Pages isolated, migrate */
    1587                 :            : } isolate_migrate_t;
    1588                 :            : 
    1589                 :            : /*
    1590                 :            :  * Allow userspace to control policy on scanning the unevictable LRU for
    1591                 :            :  * compactable pages.
    1592                 :            :  */
    1593                 :            : int sysctl_compact_unevictable_allowed __read_mostly = 1;
    1594                 :            : 
    1595                 :            : static inline void
    1596                 :            : update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
    1597                 :            : {
    1598                 :          0 :         if (cc->fast_start_pfn == ULONG_MAX)
    1599                 :            :                 return;
    1600                 :            : 
    1601                 :          0 :         if (!cc->fast_start_pfn)
    1602                 :          0 :                 cc->fast_start_pfn = pfn;
    1603                 :            : 
    1604                 :          0 :         cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
    1605                 :            : }
    1606                 :            : 
    1607                 :            : static inline unsigned long
    1608                 :            : reinit_migrate_pfn(struct compact_control *cc)
    1609                 :            : {
    1610                 :          0 :         if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
    1611                 :            :                 return cc->migrate_pfn;
    1612                 :            : 
    1613                 :          0 :         cc->migrate_pfn = cc->fast_start_pfn;
    1614                 :          0 :         cc->fast_start_pfn = ULONG_MAX;
    1615                 :            : 
    1616                 :            :         return cc->migrate_pfn;
    1617                 :            : }
    1618                 :            : 
    1619                 :            : /*
    1620                 :            :  * Briefly search the free lists for a migration source that already has
    1621                 :            :  * some free pages to reduce the number of pages that need migration
    1622                 :            :  * before a pageblock is free.
    1623                 :            :  */
    1624                 :          0 : static unsigned long fast_find_migrateblock(struct compact_control *cc)
    1625                 :            : {
    1626                 :            :         unsigned int limit = freelist_scan_limit(cc);
    1627                 :            :         unsigned int nr_scanned = 0;
    1628                 :            :         unsigned long distance;
    1629                 :          0 :         unsigned long pfn = cc->migrate_pfn;
    1630                 :            :         unsigned long high_pfn;
    1631                 :            :         int order;
    1632                 :            : 
    1633                 :            :         /* Skip hints are relied on to avoid repeats on the fast search */
    1634                 :          0 :         if (cc->ignore_skip_hint)
    1635                 :            :                 return pfn;
    1636                 :            : 
    1637                 :            :         /*
    1638                 :            :          * If the migrate_pfn is not at the start of a zone or the start
    1639                 :            :          * of a pageblock then assume this is a continuation of a previous
    1640                 :            :          * scan restarted due to COMPACT_CLUSTER_MAX.
    1641                 :            :          */
    1642                 :          0 :         if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
    1643                 :            :                 return pfn;
    1644                 :            : 
    1645                 :            :         /*
    1646                 :            :          * For smaller orders, just linearly scan as the number of pages
    1647                 :            :          * to migrate should be relatively small and does not necessarily
    1648                 :            :          * justify freeing up a large block for a small allocation.
    1649                 :            :          */
    1650                 :          0 :         if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
    1651                 :            :                 return pfn;
    1652                 :            : 
    1653                 :            :         /*
    1654                 :            :          * Only allow kcompactd and direct requests for movable pages to
    1655                 :            :          * quickly clear out a MOVABLE pageblock for allocation. This
    1656                 :            :          * reduces the risk that a large movable pageblock is freed for
    1657                 :            :          * an unmovable/reclaimable small allocation.
    1658                 :            :          */
    1659                 :          0 :         if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
    1660                 :            :                 return pfn;
    1661                 :            : 
    1662                 :            :         /*
    1663                 :            :          * When starting the migration scanner, pick any pageblock within the
    1664                 :            :          * first half of the search space. Otherwise try and pick a pageblock
    1665                 :            :          * within the first eighth to reduce the chances that a migration
    1666                 :            :          * target later becomes a source.
    1667                 :            :          */
    1668                 :          0 :         distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
    1669                 :          0 :         if (cc->migrate_pfn != cc->zone->zone_start_pfn)
    1670                 :          0 :                 distance >>= 2;
    1671                 :          0 :         high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
    1672                 :            : 
    1673                 :          0 :         for (order = cc->order - 1;
    1674                 :          0 :              order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit;
    1675                 :          0 :              order--) {
    1676                 :          0 :                 struct free_area *area = &cc->zone->free_area[order];
    1677                 :            :                 struct list_head *freelist;
    1678                 :            :                 unsigned long flags;
    1679                 :            :                 struct page *freepage;
    1680                 :            : 
    1681                 :          0 :                 if (!area->nr_free)
    1682                 :          0 :                         continue;
    1683                 :            : 
    1684                 :          0 :                 spin_lock_irqsave(&cc->zone->lock, flags);
    1685                 :          0 :                 freelist = &area->free_list[MIGRATE_MOVABLE];
    1686                 :          0 :                 list_for_each_entry(freepage, freelist, lru) {
    1687                 :            :                         unsigned long free_pfn;
    1688                 :            : 
    1689                 :          0 :                         nr_scanned++;
    1690                 :          0 :                         free_pfn = page_to_pfn(freepage);
    1691                 :          0 :                         if (free_pfn < high_pfn) {
    1692                 :            :                                 /*
    1693                 :            :                                  * Avoid if skipped recently. Ideally it would
    1694                 :            :                                  * move to the tail but even safe iteration of
    1695                 :            :                                  * the list assumes an entry is deleted, not
    1696                 :            :                                  * reordered.
    1697                 :            :                                  */
    1698                 :          0 :                                 if (get_pageblock_skip(freepage)) {
    1699                 :          0 :                                         if (list_is_last(freelist, &freepage->lru))
    1700                 :            :                                                 break;
    1701                 :            : 
    1702                 :          0 :                                         continue;
    1703                 :            :                                 }
    1704                 :            : 
    1705                 :            :                                 /* Reorder to so a future search skips recent pages */
    1706                 :          0 :                                 move_freelist_tail(freelist, freepage);
    1707                 :            : 
    1708                 :            :                                 update_fast_start_pfn(cc, free_pfn);
    1709                 :          0 :                                 pfn = pageblock_start_pfn(free_pfn);
    1710                 :          0 :                                 cc->fast_search_fail = 0;
    1711                 :          0 :                                 set_pageblock_skip(freepage);
    1712                 :          0 :                                 break;
    1713                 :            :                         }
    1714                 :            : 
    1715                 :          0 :                         if (nr_scanned >= limit) {
    1716                 :          0 :                                 cc->fast_search_fail++;
    1717                 :          0 :                                 move_freelist_tail(freelist, freepage);
    1718                 :          0 :                                 break;
    1719                 :            :                         }
    1720                 :            :                 }
    1721                 :          0 :                 spin_unlock_irqrestore(&cc->zone->lock, flags);
    1722                 :            :         }
    1723                 :            : 
    1724                 :          0 :         cc->total_migrate_scanned += nr_scanned;
    1725                 :            : 
    1726                 :            :         /*
    1727                 :            :          * If fast scanning failed then use a cached entry for a page block
    1728                 :            :          * that had free pages as the basis for starting a linear scan.
    1729                 :            :          */
    1730                 :          0 :         if (pfn == cc->migrate_pfn)
    1731                 :            :                 pfn = reinit_migrate_pfn(cc);
    1732                 :            : 
    1733                 :          0 :         return pfn;
    1734                 :            : }
    1735                 :            : 
    1736                 :            : /*
    1737                 :            :  * Isolate all pages that can be migrated from the first suitable block,
    1738                 :            :  * starting at the block pointed to by the migrate scanner pfn within
    1739                 :            :  * compact_control.
    1740                 :            :  */
    1741                 :          0 : static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
    1742                 :            : {
    1743                 :            :         unsigned long block_start_pfn;
    1744                 :            :         unsigned long block_end_pfn;
    1745                 :            :         unsigned long low_pfn;
    1746                 :            :         struct page *page;
    1747                 :          0 :         const isolate_mode_t isolate_mode =
    1748                 :          0 :                 (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
    1749                 :          0 :                 (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
    1750                 :            :         bool fast_find_block;
    1751                 :            : 
    1752                 :            :         /*
    1753                 :            :          * Start at where we last stopped, or beginning of the zone as
    1754                 :            :          * initialized by compact_zone(). The first failure will use
    1755                 :            :          * the lowest PFN as the starting point for linear scanning.
    1756                 :            :          */
    1757                 :          0 :         low_pfn = fast_find_migrateblock(cc);
    1758                 :          0 :         block_start_pfn = pageblock_start_pfn(low_pfn);
    1759                 :          0 :         if (block_start_pfn < cc->zone->zone_start_pfn)
    1760                 :            :                 block_start_pfn = cc->zone->zone_start_pfn;
    1761                 :            : 
    1762                 :            :         /*
    1763                 :            :          * fast_find_migrateblock marks a pageblock skipped so to avoid
    1764                 :            :          * the isolation_suitable check below, check whether the fast
    1765                 :            :          * search was successful.
    1766                 :            :          */
    1767                 :          0 :         fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
    1768                 :            : 
    1769                 :            :         /* Only scan within a pageblock boundary */
    1770                 :          0 :         block_end_pfn = pageblock_end_pfn(low_pfn);
    1771                 :            : 
    1772                 :            :         /*
    1773                 :            :          * Iterate over whole pageblocks until we find the first suitable.
    1774                 :            :          * Do not cross the free scanner.
    1775                 :            :          */
    1776                 :          0 :         for (; block_end_pfn <= cc->free_pfn;
    1777                 :            :                         fast_find_block = false,
    1778                 :            :                         low_pfn = block_end_pfn,
    1779                 :            :                         block_start_pfn = block_end_pfn,
    1780                 :          0 :                         block_end_pfn += pageblock_nr_pages) {
    1781                 :            : 
    1782                 :            :                 /*
    1783                 :            :                  * This can potentially iterate a massively long zone with
    1784                 :            :                  * many pageblocks unsuitable, so periodically check if we
    1785                 :            :                  * need to schedule.
    1786                 :            :                  */
    1787                 :          0 :                 if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
    1788                 :          0 :                         cond_resched();
    1789                 :            : 
    1790                 :          0 :                 page = pageblock_pfn_to_page(block_start_pfn,
    1791                 :            :                                                 block_end_pfn, cc->zone);
    1792                 :          0 :                 if (!page)
    1793                 :          0 :                         continue;
    1794                 :            : 
    1795                 :            :                 /*
    1796                 :            :                  * If isolation recently failed, do not retry. Only check the
    1797                 :            :                  * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock
    1798                 :            :                  * to be visited multiple times. Assume skip was checked
    1799                 :            :                  * before making it "skip" so other compaction instances do
    1800                 :            :                  * not scan the same block.
    1801                 :            :                  */
    1802                 :          0 :                 if (IS_ALIGNED(low_pfn, pageblock_nr_pages) &&
    1803                 :          0 :                     !fast_find_block && !isolation_suitable(cc, page))
    1804                 :          0 :                         continue;
    1805                 :            : 
    1806                 :            :                 /*
    1807                 :            :                  * For async compaction, also only scan in MOVABLE blocks
    1808                 :            :                  * without huge pages. Async compaction is optimistic to see
    1809                 :            :                  * if the minimum amount of work satisfies the allocation.
    1810                 :            :                  * The cached PFN is updated as it's possible that all
    1811                 :            :                  * remaining blocks between source and target are unsuitable
    1812                 :            :                  * and the compaction scanners fail to meet.
    1813                 :            :                  */
    1814                 :          0 :                 if (!suitable_migration_source(cc, page)) {
    1815                 :          0 :                         update_cached_migrate(cc, block_end_pfn);
    1816                 :          0 :                         continue;
    1817                 :            :                 }
    1818                 :            : 
    1819                 :            :                 /* Perform the isolation */
    1820                 :          0 :                 low_pfn = isolate_migratepages_block(cc, low_pfn,
    1821                 :            :                                                 block_end_pfn, isolate_mode);
    1822                 :            : 
    1823                 :          0 :                 if (!low_pfn)
    1824                 :            :                         return ISOLATE_ABORT;
    1825                 :            : 
    1826                 :            :                 /*
    1827                 :            :                  * Either we isolated something and proceed with migration. Or
    1828                 :            :                  * we failed and compact_zone should decide if we should
    1829                 :            :                  * continue or not.
    1830                 :            :                  */
    1831                 :            :                 break;
    1832                 :            :         }
    1833                 :            : 
    1834                 :            :         /* Record where migration scanner will be restarted. */
    1835                 :          0 :         cc->migrate_pfn = low_pfn;
    1836                 :            : 
    1837                 :          0 :         return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
    1838                 :            : }
    1839                 :            : 
    1840                 :            : /*
    1841                 :            :  * order == -1 is expected when compacting via
    1842                 :            :  * /proc/sys/vm/compact_memory
    1843                 :            :  */
    1844                 :            : static inline bool is_via_compact_memory(int order)
    1845                 :            : {
    1846                 :            :         return order == -1;
    1847                 :            : }
    1848                 :            : 
    1849                 :          0 : static enum compact_result __compact_finished(struct compact_control *cc)
    1850                 :            : {
    1851                 :            :         unsigned int order;
    1852                 :          0 :         const int migratetype = cc->migratetype;
    1853                 :            :         int ret;
    1854                 :            : 
    1855                 :            :         /* Compaction run completes if the migrate and free scanner meet */
    1856                 :          0 :         if (compact_scanners_met(cc)) {
    1857                 :            :                 /* Let the next compaction start anew. */
    1858                 :          0 :                 reset_cached_positions(cc->zone);
    1859                 :            : 
    1860                 :            :                 /*
    1861                 :            :                  * Mark that the PG_migrate_skip information should be cleared
    1862                 :            :                  * by kswapd when it goes to sleep. kcompactd does not set the
    1863                 :            :                  * flag itself as the decision to be clear should be directly
    1864                 :            :                  * based on an allocation request.
    1865                 :            :                  */
    1866                 :          0 :                 if (cc->direct_compaction)
    1867                 :          0 :                         cc->zone->compact_blockskip_flush = true;
    1868                 :            : 
    1869                 :          0 :                 if (cc->whole_zone)
    1870                 :            :                         return COMPACT_COMPLETE;
    1871                 :            :                 else
    1872                 :          0 :                         return COMPACT_PARTIAL_SKIPPED;
    1873                 :            :         }
    1874                 :            : 
    1875                 :          0 :         if (is_via_compact_memory(cc->order))
    1876                 :            :                 return COMPACT_CONTINUE;
    1877                 :            : 
    1878                 :            :         /*
    1879                 :            :          * Always finish scanning a pageblock to reduce the possibility of
    1880                 :            :          * fallbacks in the future. This is particularly important when
    1881                 :            :          * migration source is unmovable/reclaimable but it's not worth
    1882                 :            :          * special casing.
    1883                 :            :          */
    1884                 :          0 :         if (!IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
    1885                 :            :                 return COMPACT_CONTINUE;
    1886                 :            : 
    1887                 :            :         /* Direct compactor: Is a suitable page free? */
    1888                 :            :         ret = COMPACT_NO_SUITABLE_PAGE;
    1889                 :          0 :         for (order = cc->order; order < MAX_ORDER; order++) {
    1890                 :          0 :                 struct free_area *area = &cc->zone->free_area[order];
    1891                 :            :                 bool can_steal;
    1892                 :            : 
    1893                 :            :                 /* Job done if page is free of the right migratetype */
    1894                 :          0 :                 if (!free_area_empty(area, migratetype))
    1895                 :          0 :                         return COMPACT_SUCCESS;
    1896                 :            : 
    1897                 :            : #ifdef CONFIG_CMA
    1898                 :            :                 /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
    1899                 :          0 :                 if (migratetype == MIGRATE_MOVABLE &&
    1900                 :            :                         !free_area_empty(area, MIGRATE_CMA))
    1901                 :            :                         return COMPACT_SUCCESS;
    1902                 :            : #endif
    1903                 :            :                 /*
    1904                 :            :                  * Job done if allocation would steal freepages from
    1905                 :            :                  * other migratetype buddy lists.
    1906                 :            :                  */
    1907                 :          0 :                 if (find_suitable_fallback(area, order, migratetype,
    1908                 :            :                                                 true, &can_steal) != -1) {
    1909                 :            : 
    1910                 :            :                         /* movable pages are OK in any pageblock */
    1911                 :          0 :                         if (migratetype == MIGRATE_MOVABLE)
    1912                 :            :                                 return COMPACT_SUCCESS;
    1913                 :            : 
    1914                 :            :                         /*
    1915                 :            :                          * We are stealing for a non-movable allocation. Make
    1916                 :            :                          * sure we finish compacting the current pageblock
    1917                 :            :                          * first so it is as free as possible and we won't
    1918                 :            :                          * have to steal another one soon. This only applies
    1919                 :            :                          * to sync compaction, as async compaction operates
    1920                 :            :                          * on pageblocks of the same migratetype.
    1921                 :            :                          */
    1922                 :          0 :                         if (cc->mode == MIGRATE_ASYNC ||
    1923                 :          0 :                                         IS_ALIGNED(cc->migrate_pfn,
    1924                 :            :                                                         pageblock_nr_pages)) {
    1925                 :            :                                 return COMPACT_SUCCESS;
    1926                 :            :                         }
    1927                 :            : 
    1928                 :            :                         ret = COMPACT_CONTINUE;
    1929                 :          0 :                         break;
    1930                 :            :                 }
    1931                 :            :         }
    1932                 :            : 
    1933                 :          0 :         if (cc->contended || fatal_signal_pending(current))
    1934                 :            :                 ret = COMPACT_CONTENDED;
    1935                 :            : 
    1936                 :          0 :         return ret;
    1937                 :            : }
    1938                 :            : 
    1939                 :          0 : static enum compact_result compact_finished(struct compact_control *cc)
    1940                 :            : {
    1941                 :            :         int ret;
    1942                 :            : 
    1943                 :          0 :         ret = __compact_finished(cc);
    1944                 :          0 :         trace_mm_compaction_finished(cc->zone, cc->order, ret);
    1945                 :          0 :         if (ret == COMPACT_NO_SUITABLE_PAGE)
    1946                 :            :                 ret = COMPACT_CONTINUE;
    1947                 :            : 
    1948                 :          0 :         return ret;
    1949                 :            : }
    1950                 :            : 
    1951                 :            : /*
    1952                 :            :  * compaction_suitable: Is this suitable to run compaction on this zone now?
    1953                 :            :  * Returns
    1954                 :            :  *   COMPACT_SKIPPED  - If there are too few free pages for compaction
    1955                 :            :  *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
    1956                 :            :  *   COMPACT_CONTINUE - If compaction should run now
    1957                 :            :  */
    1958                 :          0 : static enum compact_result __compaction_suitable(struct zone *zone, int order,
    1959                 :            :                                         unsigned int alloc_flags,
    1960                 :            :                                         int classzone_idx,
    1961                 :            :                                         unsigned long wmark_target)
    1962                 :            : {
    1963                 :            :         unsigned long watermark;
    1964                 :            : 
    1965                 :          0 :         if (is_via_compact_memory(order))
    1966                 :            :                 return COMPACT_CONTINUE;
    1967                 :            : 
    1968                 :          0 :         watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
    1969                 :            :         /*
    1970                 :            :          * If watermarks for high-order allocation are already met, there
    1971                 :            :          * should be no need for compaction at all.
    1972                 :            :          */
    1973                 :          0 :         if (zone_watermark_ok(zone, order, watermark, classzone_idx,
    1974                 :            :                                                                 alloc_flags))
    1975                 :            :                 return COMPACT_SUCCESS;
    1976                 :            : 
    1977                 :            :         /*
    1978                 :            :          * Watermarks for order-0 must be met for compaction to be able to
    1979                 :            :          * isolate free pages for migration targets. This means that the
    1980                 :            :          * watermark and alloc_flags have to match, or be more pessimistic than
    1981                 :            :          * the check in __isolate_free_page(). We don't use the direct
    1982                 :            :          * compactor's alloc_flags, as they are not relevant for freepage
    1983                 :            :          * isolation. We however do use the direct compactor's classzone_idx to
    1984                 :            :          * skip over zones where lowmem reserves would prevent allocation even
    1985                 :            :          * if compaction succeeds.
    1986                 :            :          * For costly orders, we require low watermark instead of min for
    1987                 :            :          * compaction to proceed to increase its chances.
    1988                 :            :          * ALLOC_CMA is used, as pages in CMA pageblocks are considered
    1989                 :            :          * suitable migration targets
    1990                 :            :          */
    1991                 :            :         watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
    1992                 :          0 :                                 low_wmark_pages(zone) : min_wmark_pages(zone);
    1993                 :          0 :         watermark += compact_gap(order);
    1994                 :          0 :         if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
    1995                 :            :                                                 ALLOC_CMA, wmark_target))
    1996                 :            :                 return COMPACT_SKIPPED;
    1997                 :            : 
    1998                 :          0 :         return COMPACT_CONTINUE;
    1999                 :            : }
    2000                 :            : 
    2001                 :          0 : enum compact_result compaction_suitable(struct zone *zone, int order,
    2002                 :            :                                         unsigned int alloc_flags,
    2003                 :            :                                         int classzone_idx)
    2004                 :            : {
    2005                 :            :         enum compact_result ret;
    2006                 :            :         int fragindex;
    2007                 :            : 
    2008                 :          0 :         ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx,
    2009                 :            :                                     zone_page_state(zone, NR_FREE_PAGES));
    2010                 :            :         /*
    2011                 :            :          * fragmentation index determines if allocation failures are due to
    2012                 :            :          * low memory or external fragmentation
    2013                 :            :          *
    2014                 :            :          * index of -1000 would imply allocations might succeed depending on
    2015                 :            :          * watermarks, but we already failed the high-order watermark check
    2016                 :            :          * index towards 0 implies failure is due to lack of memory
    2017                 :            :          * index towards 1000 implies failure is due to fragmentation
    2018                 :            :          *
    2019                 :            :          * Only compact if a failure would be due to fragmentation. Also
    2020                 :            :          * ignore fragindex for non-costly orders where the alternative to
    2021                 :            :          * a successful reclaim/compaction is OOM. Fragindex and the
    2022                 :            :          * vm.extfrag_threshold sysctl is meant as a heuristic to prevent
    2023                 :            :          * excessive compaction for costly orders, but it should not be at the
    2024                 :            :          * expense of system stability.
    2025                 :            :          */
    2026                 :          0 :         if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
    2027                 :          0 :                 fragindex = fragmentation_index(zone, order);
    2028                 :          0 :                 if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
    2029                 :            :                         ret = COMPACT_NOT_SUITABLE_ZONE;
    2030                 :            :         }
    2031                 :            : 
    2032                 :          0 :         trace_mm_compaction_suitable(zone, order, ret);
    2033                 :          0 :         if (ret == COMPACT_NOT_SUITABLE_ZONE)
    2034                 :            :                 ret = COMPACT_SKIPPED;
    2035                 :            : 
    2036                 :          0 :         return ret;
    2037                 :            : }
    2038                 :            : 
    2039                 :          0 : bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
    2040                 :            :                 int alloc_flags)
    2041                 :            : {
    2042                 :            :         struct zone *zone;
    2043                 :            :         struct zoneref *z;
    2044                 :            : 
    2045                 :            :         /*
    2046                 :            :          * Make sure at least one zone would pass __compaction_suitable if we continue
    2047                 :            :          * retrying the reclaim.
    2048                 :            :          */
    2049                 :          0 :         for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
    2050                 :            :                                         ac->nodemask) {
    2051                 :            :                 unsigned long available;
    2052                 :            :                 enum compact_result compact_result;
    2053                 :            : 
    2054                 :            :                 /*
    2055                 :            :                  * Do not consider all the reclaimable memory because we do not
    2056                 :            :                  * want to trash just for a single high order allocation which
    2057                 :            :                  * is even not guaranteed to appear even if __compaction_suitable
    2058                 :            :                  * is happy about the watermark check.
    2059                 :            :                  */
    2060                 :          0 :                 available = zone_reclaimable_pages(zone) / order;
    2061                 :          0 :                 available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
    2062                 :          0 :                 compact_result = __compaction_suitable(zone, order, alloc_flags,
    2063                 :            :                                 ac_classzone_idx(ac), available);
    2064                 :          0 :                 if (compact_result != COMPACT_SKIPPED)
    2065                 :            :                         return true;
    2066                 :            :         }
    2067                 :            : 
    2068                 :            :         return false;
    2069                 :            : }
    2070                 :            : 
    2071                 :            : static enum compact_result
    2072                 :          0 : compact_zone(struct compact_control *cc, struct capture_control *capc)
    2073                 :            : {
    2074                 :            :         enum compact_result ret;
    2075                 :          0 :         unsigned long start_pfn = cc->zone->zone_start_pfn;
    2076                 :            :         unsigned long end_pfn = zone_end_pfn(cc->zone);
    2077                 :            :         unsigned long last_migrated_pfn;
    2078                 :          0 :         const bool sync = cc->mode != MIGRATE_ASYNC;
    2079                 :            :         bool update_cached;
    2080                 :            : 
    2081                 :            :         /*
    2082                 :            :          * These counters track activities during zone compaction.  Initialize
    2083                 :            :          * them before compacting a new zone.
    2084                 :            :          */
    2085                 :          0 :         cc->total_migrate_scanned = 0;
    2086                 :          0 :         cc->total_free_scanned = 0;
    2087                 :          0 :         cc->nr_migratepages = 0;
    2088                 :          0 :         cc->nr_freepages = 0;
    2089                 :          0 :         INIT_LIST_HEAD(&cc->freepages);
    2090                 :          0 :         INIT_LIST_HEAD(&cc->migratepages);
    2091                 :            : 
    2092                 :          0 :         cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
    2093                 :          0 :         ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
    2094                 :            :                                                         cc->classzone_idx);
    2095                 :            :         /* Compaction is likely to fail */
    2096                 :          0 :         if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
    2097                 :            :                 return ret;
    2098                 :            : 
    2099                 :            :         /* huh, compaction_suitable is returning something unexpected */
    2100                 :            :         VM_BUG_ON(ret != COMPACT_CONTINUE);
    2101                 :            : 
    2102                 :            :         /*
    2103                 :            :          * Clear pageblock skip if there were failures recently and compaction
    2104                 :            :          * is about to be retried after being deferred.
    2105                 :            :          */
    2106                 :          0 :         if (compaction_restarting(cc->zone, cc->order))
    2107                 :          0 :                 __reset_isolation_suitable(cc->zone);
    2108                 :            : 
    2109                 :            :         /*
    2110                 :            :          * Setup to move all movable pages to the end of the zone. Used cached
    2111                 :            :          * information on where the scanners should start (unless we explicitly
    2112                 :            :          * want to compact the whole zone), but check that it is initialised
    2113                 :            :          * by ensuring the values are within zone boundaries.
    2114                 :            :          */
    2115                 :          0 :         cc->fast_start_pfn = 0;
    2116                 :          0 :         if (cc->whole_zone) {
    2117                 :          0 :                 cc->migrate_pfn = start_pfn;
    2118                 :          0 :                 cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
    2119                 :            :         } else {
    2120                 :          0 :                 cc->migrate_pfn = cc->zone->compact_cached_migrate_pfn[sync];
    2121                 :          0 :                 cc->free_pfn = cc->zone->compact_cached_free_pfn;
    2122                 :          0 :                 if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
    2123                 :          0 :                         cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
    2124                 :          0 :                         cc->zone->compact_cached_free_pfn = cc->free_pfn;
    2125                 :            :                 }
    2126                 :          0 :                 if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
    2127                 :          0 :                         cc->migrate_pfn = start_pfn;
    2128                 :          0 :                         cc->zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
    2129                 :          0 :                         cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
    2130                 :            :                 }
    2131                 :            : 
    2132                 :          0 :                 if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn)
    2133                 :          0 :                         cc->whole_zone = true;
    2134                 :            :         }
    2135                 :            : 
    2136                 :            :         last_migrated_pfn = 0;
    2137                 :            : 
    2138                 :            :         /*
    2139                 :            :          * Migrate has separate cached PFNs for ASYNC and SYNC* migration on
    2140                 :            :          * the basis that some migrations will fail in ASYNC mode. However,
    2141                 :            :          * if the cached PFNs match and pageblocks are skipped due to having
    2142                 :            :          * no isolation candidates, then the sync state does not matter.
    2143                 :            :          * Until a pageblock with isolation candidates is found, keep the
    2144                 :            :          * cached PFNs in sync to avoid revisiting the same blocks.
    2145                 :            :          */
    2146                 :          0 :         update_cached = !sync &&
    2147                 :          0 :                 cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1];
    2148                 :            : 
    2149                 :          0 :         trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
    2150                 :            :                                 cc->free_pfn, end_pfn, sync);
    2151                 :            : 
    2152                 :          0 :         migrate_prep_local();
    2153                 :            : 
    2154                 :          0 :         while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
    2155                 :            :                 int err;
    2156                 :          0 :                 unsigned long start_pfn = cc->migrate_pfn;
    2157                 :            : 
    2158                 :            :                 /*
    2159                 :            :                  * Avoid multiple rescans which can happen if a page cannot be
    2160                 :            :                  * isolated (dirty/writeback in async mode) or if the migrated
    2161                 :            :                  * pages are being allocated before the pageblock is cleared.
    2162                 :            :                  * The first rescan will capture the entire pageblock for
    2163                 :            :                  * migration. If it fails, it'll be marked skip and scanning
    2164                 :            :                  * will proceed as normal.
    2165                 :            :                  */
    2166                 :          0 :                 cc->rescan = false;
    2167                 :          0 :                 if (pageblock_start_pfn(last_migrated_pfn) ==
    2168                 :            :                     pageblock_start_pfn(start_pfn)) {
    2169                 :          0 :                         cc->rescan = true;
    2170                 :            :                 }
    2171                 :            : 
    2172                 :          0 :                 switch (isolate_migratepages(cc)) {
    2173                 :            :                 case ISOLATE_ABORT:
    2174                 :            :                         ret = COMPACT_CONTENDED;
    2175                 :          0 :                         putback_movable_pages(&cc->migratepages);
    2176                 :          0 :                         cc->nr_migratepages = 0;
    2177                 :            :                         last_migrated_pfn = 0;
    2178                 :          0 :                         goto out;
    2179                 :            :                 case ISOLATE_NONE:
    2180                 :          0 :                         if (update_cached) {
    2181                 :          0 :                                 cc->zone->compact_cached_migrate_pfn[1] =
    2182                 :          0 :                                         cc->zone->compact_cached_migrate_pfn[0];
    2183                 :            :                         }
    2184                 :            : 
    2185                 :            :                         /*
    2186                 :            :                          * We haven't isolated and migrated anything, but
    2187                 :            :                          * there might still be unflushed migrations from
    2188                 :            :                          * previous cc->order aligned block.
    2189                 :            :                          */
    2190                 :            :                         goto check_drain;
    2191                 :            :                 case ISOLATE_SUCCESS:
    2192                 :            :                         update_cached = false;
    2193                 :            :                         last_migrated_pfn = start_pfn;
    2194                 :            :                         ;
    2195                 :            :                 }
    2196                 :            : 
    2197                 :          0 :                 err = migrate_pages(&cc->migratepages, compaction_alloc,
    2198                 :            :                                 compaction_free, (unsigned long)cc, cc->mode,
    2199                 :            :                                 MR_COMPACTION);
    2200                 :            : 
    2201                 :          0 :                 trace_mm_compaction_migratepages(cc->nr_migratepages, err,
    2202                 :            :                                                         &cc->migratepages);
    2203                 :            : 
    2204                 :            :                 /* All pages were either migrated or will be released */
    2205                 :          0 :                 cc->nr_migratepages = 0;
    2206                 :          0 :                 if (err) {
    2207                 :          0 :                         putback_movable_pages(&cc->migratepages);
    2208                 :            :                         /*
    2209                 :            :                          * migrate_pages() may return -ENOMEM when scanners meet
    2210                 :            :                          * and we want compact_finished() to detect it
    2211                 :            :                          */
    2212                 :          0 :                         if (err == -ENOMEM && !compact_scanners_met(cc)) {
    2213                 :            :                                 ret = COMPACT_CONTENDED;
    2214                 :            :                                 goto out;
    2215                 :            :                         }
    2216                 :            :                         /*
    2217                 :            :                          * We failed to migrate at least one page in the current
    2218                 :            :                          * order-aligned block, so skip the rest of it.
    2219                 :            :                          */
    2220                 :          0 :                         if (cc->direct_compaction &&
    2221                 :          0 :                                                 (cc->mode == MIGRATE_ASYNC)) {
    2222                 :          0 :                                 cc->migrate_pfn = block_end_pfn(
    2223                 :            :                                                 cc->migrate_pfn - 1, cc->order);
    2224                 :            :                                 /* Draining pcplists is useless in this case */
    2225                 :            :                                 last_migrated_pfn = 0;
    2226                 :            :                         }
    2227                 :            :                 }
    2228                 :            : 
    2229                 :            : check_drain:
    2230                 :            :                 /*
    2231                 :            :                  * Has the migration scanner moved away from the previous
    2232                 :            :                  * cc->order aligned block where we migrated from? If yes,
    2233                 :            :                  * flush the pages that were freed, so that they can merge and
    2234                 :            :                  * compact_finished() can detect immediately if allocation
    2235                 :            :                  * would succeed.
    2236                 :            :                  */
    2237                 :          0 :                 if (cc->order > 0 && last_migrated_pfn) {
    2238                 :            :                         int cpu;
    2239                 :          0 :                         unsigned long current_block_start =
    2240                 :          0 :                                 block_start_pfn(cc->migrate_pfn, cc->order);
    2241                 :            : 
    2242                 :          0 :                         if (last_migrated_pfn < current_block_start) {
    2243                 :          0 :                                 cpu = get_cpu();
    2244                 :          0 :                                 lru_add_drain_cpu(cpu);
    2245                 :          0 :                                 drain_local_pages(cc->zone);
    2246                 :          0 :                                 put_cpu();
    2247                 :            :                                 /* No more flushing until we migrate again */
    2248                 :            :                                 last_migrated_pfn = 0;
    2249                 :            :                         }
    2250                 :            :                 }
    2251                 :            : 
    2252                 :            :                 /* Stop if a page has been captured */
    2253                 :          0 :                 if (capc && capc->page) {
    2254                 :            :                         ret = COMPACT_SUCCESS;
    2255                 :            :                         break;
    2256                 :            :                 }
    2257                 :            :         }
    2258                 :            : 
    2259                 :            : out:
    2260                 :            :         /*
    2261                 :            :          * Release free pages and update where the free scanner should restart,
    2262                 :            :          * so we don't leave any returned pages behind in the next attempt.
    2263                 :            :          */
    2264                 :          0 :         if (cc->nr_freepages > 0) {
    2265                 :          0 :                 unsigned long free_pfn = release_freepages(&cc->freepages);
    2266                 :            : 
    2267                 :          0 :                 cc->nr_freepages = 0;
    2268                 :            :                 VM_BUG_ON(free_pfn == 0);
    2269                 :            :                 /* The cached pfn is always the first in a pageblock */
    2270                 :          0 :                 free_pfn = pageblock_start_pfn(free_pfn);
    2271                 :            :                 /*
    2272                 :            :                  * Only go back, not forward. The cached pfn might have been
    2273                 :            :                  * already reset to zone end in compact_finished()
    2274                 :            :                  */
    2275                 :          0 :                 if (free_pfn > cc->zone->compact_cached_free_pfn)
    2276                 :          0 :                         cc->zone->compact_cached_free_pfn = free_pfn;
    2277                 :            :         }
    2278                 :            : 
    2279                 :          0 :         count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
    2280                 :          0 :         count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
    2281                 :            : 
    2282                 :          0 :         trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
    2283                 :            :                                 cc->free_pfn, end_pfn, sync, ret);
    2284                 :            : 
    2285                 :          0 :         return ret;
    2286                 :            : }
    2287                 :            : 
    2288                 :          0 : static enum compact_result compact_zone_order(struct zone *zone, int order,
    2289                 :            :                 gfp_t gfp_mask, enum compact_priority prio,
    2290                 :            :                 unsigned int alloc_flags, int classzone_idx,
    2291                 :            :                 struct page **capture)
    2292                 :            : {
    2293                 :            :         enum compact_result ret;
    2294                 :          0 :         struct compact_control cc = {
    2295                 :            :                 .order = order,
    2296                 :            :                 .search_order = order,
    2297                 :            :                 .gfp_mask = gfp_mask,
    2298                 :            :                 .zone = zone,
    2299                 :            :                 .mode = (prio == COMPACT_PRIO_ASYNC) ?
    2300                 :          0 :                                         MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT,
    2301                 :            :                 .alloc_flags = alloc_flags,
    2302                 :            :                 .classzone_idx = classzone_idx,
    2303                 :            :                 .direct_compaction = true,
    2304                 :            :                 .whole_zone = (prio == MIN_COMPACT_PRIORITY),
    2305                 :          0 :                 .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
    2306                 :            :                 .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
    2307                 :            :         };
    2308                 :          0 :         struct capture_control capc = {
    2309                 :            :                 .cc = &cc,
    2310                 :            :                 .page = NULL,
    2311                 :            :         };
    2312                 :            : 
    2313                 :            :         /*
    2314                 :            :          * Make sure the structs are really initialized before we expose the
    2315                 :            :          * capture control, in case we are interrupted and the interrupt handler
    2316                 :            :          * frees a page.
    2317                 :            :          */
    2318                 :          0 :         barrier();
    2319                 :          0 :         WRITE_ONCE(current->capture_control, &capc);
    2320                 :            : 
    2321                 :          0 :         ret = compact_zone(&cc, &capc);
    2322                 :            : 
    2323                 :            :         VM_BUG_ON(!list_empty(&cc.freepages));
    2324                 :            :         VM_BUG_ON(!list_empty(&cc.migratepages));
    2325                 :            : 
    2326                 :            :         /*
    2327                 :            :          * Make sure we hide capture control first before we read the captured
    2328                 :            :          * page pointer, otherwise an interrupt could free and capture a page
    2329                 :            :          * and we would leak it.
    2330                 :            :          */
    2331                 :          0 :         WRITE_ONCE(current->capture_control, NULL);
    2332                 :          0 :         *capture = READ_ONCE(capc.page);
    2333                 :            : 
    2334                 :          0 :         return ret;
    2335                 :            : }
    2336                 :            : 
    2337                 :            : int sysctl_extfrag_threshold = 500;
    2338                 :            : 
    2339                 :            : /**
    2340                 :            :  * try_to_compact_pages - Direct compact to satisfy a high-order allocation
    2341                 :            :  * @gfp_mask: The GFP mask of the current allocation
    2342                 :            :  * @order: The order of the current allocation
    2343                 :            :  * @alloc_flags: The allocation flags of the current allocation
    2344                 :            :  * @ac: The context of current allocation
    2345                 :            :  * @prio: Determines how hard direct compaction should try to succeed
    2346                 :            :  * @capture: Pointer to free page created by compaction will be stored here
    2347                 :            :  *
    2348                 :            :  * This is the main entry point for direct page compaction.
    2349                 :            :  */
    2350                 :          0 : enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
    2351                 :            :                 unsigned int alloc_flags, const struct alloc_context *ac,
    2352                 :            :                 enum compact_priority prio, struct page **capture)
    2353                 :            : {
    2354                 :          0 :         int may_perform_io = gfp_mask & __GFP_IO;
    2355                 :            :         struct zoneref *z;
    2356                 :            :         struct zone *zone;
    2357                 :            :         enum compact_result rc = COMPACT_SKIPPED;
    2358                 :            : 
    2359                 :            :         /*
    2360                 :            :          * Check if the GFP flags allow compaction - GFP_NOIO is really
    2361                 :            :          * tricky context because the migration might require IO
    2362                 :            :          */
    2363                 :          0 :         if (!may_perform_io)
    2364                 :            :                 return COMPACT_SKIPPED;
    2365                 :            : 
    2366                 :          0 :         trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
    2367                 :            : 
    2368                 :            :         /* Compact each zone in the list */
    2369                 :          0 :         for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
    2370                 :            :                                                                 ac->nodemask) {
    2371                 :            :                 enum compact_result status;
    2372                 :            : 
    2373                 :          0 :                 if (prio > MIN_COMPACT_PRIORITY
    2374                 :          0 :                                         && compaction_deferred(zone, order)) {
    2375                 :          0 :                         rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
    2376                 :          0 :                         continue;
    2377                 :            :                 }
    2378                 :            : 
    2379                 :          0 :                 status = compact_zone_order(zone, order, gfp_mask, prio,
    2380                 :            :                                 alloc_flags, ac_classzone_idx(ac), capture);
    2381                 :          0 :                 rc = max(status, rc);
    2382                 :            : 
    2383                 :            :                 /* The allocation should succeed, stop compacting */
    2384                 :          0 :                 if (status == COMPACT_SUCCESS) {
    2385                 :            :                         /*
    2386                 :            :                          * We think the allocation will succeed in this zone,
    2387                 :            :                          * but it is not certain, hence the false. The caller
    2388                 :            :                          * will repeat this with true if allocation indeed
    2389                 :            :                          * succeeds in this zone.
    2390                 :            :                          */
    2391                 :          0 :                         compaction_defer_reset(zone, order, false);
    2392                 :            : 
    2393                 :          0 :                         break;
    2394                 :            :                 }
    2395                 :            : 
    2396                 :          0 :                 if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
    2397                 :            :                                         status == COMPACT_PARTIAL_SKIPPED))
    2398                 :            :                         /*
    2399                 :            :                          * We think that allocation won't succeed in this zone
    2400                 :            :                          * so we defer compaction there. If it ends up
    2401                 :            :                          * succeeding after all, it will be reset.
    2402                 :            :                          */
    2403                 :          0 :                         defer_compaction(zone, order);
    2404                 :            : 
    2405                 :            :                 /*
    2406                 :            :                  * We might have stopped compacting due to need_resched() in
    2407                 :            :                  * async compaction, or due to a fatal signal detected. In that
    2408                 :            :                  * case do not try further zones
    2409                 :            :                  */
    2410                 :          0 :                 if ((prio == COMPACT_PRIO_ASYNC && need_resched())
    2411                 :          0 :                                         || fatal_signal_pending(current))
    2412                 :            :                         break;
    2413                 :            :         }
    2414                 :            : 
    2415                 :          0 :         return rc;
    2416                 :            : }
    2417                 :            : 
    2418                 :            : 
    2419                 :            : /* Compact all zones within a node */
    2420                 :          0 : static void compact_node(int nid)
    2421                 :            : {
    2422                 :            :         pg_data_t *pgdat = NODE_DATA(nid);
    2423                 :            :         int zoneid;
    2424                 :            :         struct zone *zone;
    2425                 :          0 :         struct compact_control cc = {
    2426                 :            :                 .order = -1,
    2427                 :            :                 .mode = MIGRATE_SYNC,
    2428                 :            :                 .ignore_skip_hint = true,
    2429                 :            :                 .whole_zone = true,
    2430                 :            :                 .gfp_mask = GFP_KERNEL,
    2431                 :            :         };
    2432                 :            : 
    2433                 :            : 
    2434                 :          0 :         for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
    2435                 :            : 
    2436                 :          0 :                 zone = &pgdat->node_zones[zoneid];
    2437                 :          0 :                 if (!populated_zone(zone))
    2438                 :          0 :                         continue;
    2439                 :            : 
    2440                 :          0 :                 cc.zone = zone;
    2441                 :            : 
    2442                 :          0 :                 compact_zone(&cc, NULL);
    2443                 :            : 
    2444                 :            :                 VM_BUG_ON(!list_empty(&cc.freepages));
    2445                 :            :                 VM_BUG_ON(!list_empty(&cc.migratepages));
    2446                 :            :         }
    2447                 :          0 : }
    2448                 :            : 
    2449                 :            : /* Compact all nodes in the system */
    2450                 :          0 : static void compact_nodes(void)
    2451                 :            : {
    2452                 :            :         int nid;
    2453                 :            : 
    2454                 :            :         /* Flush pending updates to the LRU lists */
    2455                 :          0 :         lru_add_drain_all();
    2456                 :            : 
    2457                 :          0 :         for_each_online_node(nid)
    2458                 :          0 :                 compact_node(nid);
    2459                 :          0 : }
    2460                 :            : 
    2461                 :            : /* The written value is actually unused, all memory is compacted */
    2462                 :            : int sysctl_compact_memory;
    2463                 :            : 
    2464                 :            : /*
    2465                 :            :  * This is the entry point for compacting all nodes via
    2466                 :            :  * /proc/sys/vm/compact_memory
    2467                 :            :  */
    2468                 :          0 : int sysctl_compaction_handler(struct ctl_table *table, int write,
    2469                 :            :                         void __user *buffer, size_t *length, loff_t *ppos)
    2470                 :            : {
    2471                 :          0 :         if (write)
    2472                 :          0 :                 compact_nodes();
    2473                 :            : 
    2474                 :          0 :         return 0;
    2475                 :            : }
    2476                 :            : 
    2477                 :            : #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
    2478                 :            : static ssize_t sysfs_compact_node(struct device *dev,
    2479                 :            :                         struct device_attribute *attr,
    2480                 :            :                         const char *buf, size_t count)
    2481                 :            : {
    2482                 :            :         int nid = dev->id;
    2483                 :            : 
    2484                 :            :         if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
    2485                 :            :                 /* Flush pending updates to the LRU lists */
    2486                 :            :                 lru_add_drain_all();
    2487                 :            : 
    2488                 :            :                 compact_node(nid);
    2489                 :            :         }
    2490                 :            : 
    2491                 :            :         return count;
    2492                 :            : }
    2493                 :            : static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node);
    2494                 :            : 
    2495                 :            : int compaction_register_node(struct node *node)
    2496                 :            : {
    2497                 :            :         return device_create_file(&node->dev, &dev_attr_compact);
    2498                 :            : }
    2499                 :            : 
    2500                 :            : void compaction_unregister_node(struct node *node)
    2501                 :            : {
    2502                 :            :         return device_remove_file(&node->dev, &dev_attr_compact);
    2503                 :            : }
    2504                 :            : #endif /* CONFIG_SYSFS && CONFIG_NUMA */
    2505                 :            : 
    2506                 :            : static inline bool kcompactd_work_requested(pg_data_t *pgdat)
    2507                 :            : {
    2508                 :          3 :         return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
    2509                 :            : }
    2510                 :            : 
    2511                 :          0 : static bool kcompactd_node_suitable(pg_data_t *pgdat)
    2512                 :            : {
    2513                 :            :         int zoneid;
    2514                 :            :         struct zone *zone;
    2515                 :          0 :         enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
    2516                 :            : 
    2517                 :          0 :         for (zoneid = 0; zoneid <= classzone_idx; zoneid++) {
    2518                 :          0 :                 zone = &pgdat->node_zones[zoneid];
    2519                 :            : 
    2520                 :          0 :                 if (!populated_zone(zone))
    2521                 :          0 :                         continue;
    2522                 :            : 
    2523                 :          0 :                 if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
    2524                 :            :                                         classzone_idx) == COMPACT_CONTINUE)
    2525                 :            :                         return true;
    2526                 :            :         }
    2527                 :            : 
    2528                 :            :         return false;
    2529                 :            : }
    2530                 :            : 
    2531                 :          0 : static void kcompactd_do_work(pg_data_t *pgdat)
    2532                 :            : {
    2533                 :            :         /*
    2534                 :            :          * With no special task, compact all zones so that a page of requested
    2535                 :            :          * order is allocatable.
    2536                 :            :          */
    2537                 :            :         int zoneid;
    2538                 :            :         struct zone *zone;
    2539                 :          0 :         struct compact_control cc = {
    2540                 :            :                 .order = pgdat->kcompactd_max_order,
    2541                 :          0 :                 .search_order = pgdat->kcompactd_max_order,
    2542                 :          0 :                 .classzone_idx = pgdat->kcompactd_classzone_idx,
    2543                 :            :                 .mode = MIGRATE_SYNC_LIGHT,
    2544                 :            :                 .ignore_skip_hint = false,
    2545                 :            :                 .gfp_mask = GFP_KERNEL,
    2546                 :            :         };
    2547                 :          0 :         trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
    2548                 :            :                                                         cc.classzone_idx);
    2549                 :            :         count_compact_event(KCOMPACTD_WAKE);
    2550                 :            : 
    2551                 :          0 :         for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) {
    2552                 :            :                 int status;
    2553                 :            : 
    2554                 :          0 :                 zone = &pgdat->node_zones[zoneid];
    2555                 :          0 :                 if (!populated_zone(zone))
    2556                 :          0 :                         continue;
    2557                 :            : 
    2558                 :          0 :                 if (compaction_deferred(zone, cc.order))
    2559                 :          0 :                         continue;
    2560                 :            : 
    2561                 :          0 :                 if (compaction_suitable(zone, cc.order, 0, zoneid) !=
    2562                 :            :                                                         COMPACT_CONTINUE)
    2563                 :          0 :                         continue;
    2564                 :            : 
    2565                 :          0 :                 if (kthread_should_stop())
    2566                 :          0 :                         return;
    2567                 :            : 
    2568                 :          0 :                 cc.zone = zone;
    2569                 :          0 :                 status = compact_zone(&cc, NULL);
    2570                 :            : 
    2571                 :          0 :                 if (status == COMPACT_SUCCESS) {
    2572                 :          0 :                         compaction_defer_reset(zone, cc.order, false);
    2573                 :          0 :                 } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
    2574                 :            :                         /*
    2575                 :            :                          * Buddy pages may become stranded on pcps that could
    2576                 :            :                          * otherwise coalesce on the zone's free area for
    2577                 :            :                          * order >= cc.order.  This is ratelimited by the
    2578                 :            :                          * upcoming deferral.
    2579                 :            :                          */
    2580                 :          0 :                         drain_all_pages(zone);
    2581                 :            : 
    2582                 :            :                         /*
    2583                 :            :                          * We use sync migration mode here, so we defer like
    2584                 :            :                          * sync direct compaction does.
    2585                 :            :                          */
    2586                 :          0 :                         defer_compaction(zone, cc.order);
    2587                 :            :                 }
    2588                 :            : 
    2589                 :            :                 count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
    2590                 :          0 :                                      cc.total_migrate_scanned);
    2591                 :            :                 count_compact_events(KCOMPACTD_FREE_SCANNED,
    2592                 :          0 :                                      cc.total_free_scanned);
    2593                 :            : 
    2594                 :            :                 VM_BUG_ON(!list_empty(&cc.freepages));
    2595                 :            :                 VM_BUG_ON(!list_empty(&cc.migratepages));
    2596                 :            :         }
    2597                 :            : 
    2598                 :            :         /*
    2599                 :            :          * Regardless of success, we are done until woken up next. But remember
    2600                 :            :          * the requested order/classzone_idx in case it was higher/tighter than
    2601                 :            :          * our current ones
    2602                 :            :          */
    2603                 :          0 :         if (pgdat->kcompactd_max_order <= cc.order)
    2604                 :          0 :                 pgdat->kcompactd_max_order = 0;
    2605                 :          0 :         if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx)
    2606                 :          0 :                 pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
    2607                 :            : }
    2608                 :            : 
    2609                 :          3 : void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
    2610                 :            : {
    2611                 :          3 :         if (!order)
    2612                 :            :                 return;
    2613                 :            : 
    2614                 :          0 :         if (pgdat->kcompactd_max_order < order)
    2615                 :          0 :                 pgdat->kcompactd_max_order = order;
    2616                 :            : 
    2617                 :          0 :         if (pgdat->kcompactd_classzone_idx > classzone_idx)
    2618                 :          0 :                 pgdat->kcompactd_classzone_idx = classzone_idx;
    2619                 :            : 
    2620                 :            :         /*
    2621                 :            :          * Pairs with implicit barrier in wait_event_freezable()
    2622                 :            :          * such that wakeups are not missed.
    2623                 :            :          */
    2624                 :          0 :         if (!wq_has_sleeper(&pgdat->kcompactd_wait))
    2625                 :            :                 return;
    2626                 :            : 
    2627                 :          0 :         if (!kcompactd_node_suitable(pgdat))
    2628                 :            :                 return;
    2629                 :            : 
    2630                 :          0 :         trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
    2631                 :            :                                                         classzone_idx);
    2632                 :          0 :         wake_up_interruptible(&pgdat->kcompactd_wait);
    2633                 :            : }
    2634                 :            : 
    2635                 :            : /*
    2636                 :            :  * The background compaction daemon, started as a kernel thread
    2637                 :            :  * from the init process.
    2638                 :            :  */
    2639                 :          3 : static int kcompactd(void *p)
    2640                 :            : {
    2641                 :            :         pg_data_t *pgdat = (pg_data_t*)p;
    2642                 :          3 :         struct task_struct *tsk = current;
    2643                 :            : 
    2644                 :            :         const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
    2645                 :            : 
    2646                 :          3 :         if (!cpumask_empty(cpumask))
    2647                 :          3 :                 set_cpus_allowed_ptr(tsk, cpumask);
    2648                 :            : 
    2649                 :          3 :         set_freezable();
    2650                 :            : 
    2651                 :          3 :         pgdat->kcompactd_max_order = 0;
    2652                 :          3 :         pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
    2653                 :            : 
    2654                 :          3 :         while (!kthread_should_stop()) {
    2655                 :            :                 unsigned long pflags;
    2656                 :            : 
    2657                 :          3 :                 trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
    2658                 :          3 :                 wait_event_freezable(pgdat->kcompactd_wait,
    2659                 :            :                                 kcompactd_work_requested(pgdat));
    2660                 :            : 
    2661                 :            :                 psi_memstall_enter(&pflags);
    2662                 :          0 :                 kcompactd_do_work(pgdat);
    2663                 :            :                 psi_memstall_leave(&pflags);
    2664                 :            :         }
    2665                 :            : 
    2666                 :          0 :         return 0;
    2667                 :            : }
    2668                 :            : 
    2669                 :            : /*
    2670                 :            :  * This kcompactd start function will be called by init and node-hot-add.
    2671                 :            :  * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
    2672                 :            :  */
    2673                 :          3 : int kcompactd_run(int nid)
    2674                 :            : {
    2675                 :            :         pg_data_t *pgdat = NODE_DATA(nid);
    2676                 :            :         int ret = 0;
    2677                 :            : 
    2678                 :          3 :         if (pgdat->kcompactd)
    2679                 :            :                 return 0;
    2680                 :            : 
    2681                 :          3 :         pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
    2682                 :          3 :         if (IS_ERR(pgdat->kcompactd)) {
    2683                 :          0 :                 pr_err("Failed to start kcompactd on node %d\n", nid);
    2684                 :          0 :                 ret = PTR_ERR(pgdat->kcompactd);
    2685                 :          0 :                 pgdat->kcompactd = NULL;
    2686                 :            :         }
    2687                 :          3 :         return ret;
    2688                 :            : }
    2689                 :            : 
    2690                 :            : /*
    2691                 :            :  * Called by memory hotplug when all memory in a node is offlined. Caller must
    2692                 :            :  * hold mem_hotplug_begin/end().
    2693                 :            :  */
    2694                 :          0 : void kcompactd_stop(int nid)
    2695                 :            : {
    2696                 :          0 :         struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
    2697                 :            : 
    2698                 :          0 :         if (kcompactd) {
    2699                 :          0 :                 kthread_stop(kcompactd);
    2700                 :          0 :                 NODE_DATA(nid)->kcompactd = NULL;
    2701                 :            :         }
    2702                 :          0 : }
    2703                 :            : 
    2704                 :            : /*
    2705                 :            :  * It's optimal to keep kcompactd on the same CPUs as their memory, but
    2706                 :            :  * not required for correctness. So if the last cpu in a node goes
    2707                 :            :  * away, we get changed to run anywhere: as the first one comes back,
    2708                 :            :  * restore their cpu bindings.
    2709                 :            :  */
    2710                 :          0 : static int kcompactd_cpu_online(unsigned int cpu)
    2711                 :            : {
    2712                 :            :         int nid;
    2713                 :            : 
    2714                 :          0 :         for_each_node_state(nid, N_MEMORY) {
    2715                 :            :                 pg_data_t *pgdat = NODE_DATA(nid);
    2716                 :            :                 const struct cpumask *mask;
    2717                 :            : 
    2718                 :            :                 mask = cpumask_of_node(pgdat->node_id);
    2719                 :            : 
    2720                 :          0 :                 if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
    2721                 :            :                         /* One of our CPUs online: restore mask */
    2722                 :          0 :                         set_cpus_allowed_ptr(pgdat->kcompactd, mask);
    2723                 :            :         }
    2724                 :          0 :         return 0;
    2725                 :            : }
    2726                 :            : 
    2727                 :          3 : static int __init kcompactd_init(void)
    2728                 :            : {
    2729                 :            :         int nid;
    2730                 :            :         int ret;
    2731                 :            : 
    2732                 :            :         ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
    2733                 :            :                                         "mm/compaction:online",
    2734                 :            :                                         kcompactd_cpu_online, NULL);
    2735                 :          3 :         if (ret < 0) {
    2736                 :          0 :                 pr_err("kcompactd: failed to register hotplug callbacks.\n");
    2737                 :          0 :                 return ret;
    2738                 :            :         }
    2739                 :            : 
    2740                 :          3 :         for_each_node_state(nid, N_MEMORY)
    2741                 :          3 :                 kcompactd_run(nid);
    2742                 :            :         return 0;
    2743                 :            : }
    2744                 :            : subsys_initcall(kcompactd_init)
    2745                 :            : 
    2746                 :            : #endif /* CONFIG_COMPACTION */

Generated by: LCOV version 1.14