LCOV - Real - kernel/workqueue.c

LCOV - code coverage report

Current view:	top level - kernel - workqueue.c (source / functions)		Hit	Total	Coverage
Test:	Real	Lines:	946	1452	65.2 %
Date:	2020-10-17 15:46:43	Functions:	6	136	4.4 %
Legend:	Neither, QEMU, Real, Both	Branches:	0	0	-

           Branch data     Line data    Source code

       1                 :            : // SPDX-License-Identifier: GPL-2.0-only
       2                 :            : /*
       3                 :            :  * kernel/workqueue.c - generic async execution with shared worker pool
       4                 :            :  *
       5                 :            :  * Copyright (C) 2002           Ingo Molnar
       6                 :            :  *
       7                 :            :  *   Derived from the taskqueue/keventd code by:
       8                 :            :  *     David Woodhouse <dwmw2@infradead.org>
       9                 :            :  *     Andrew Morton
      10                 :            :  *     Kai Petzke <wpp@marie.physik.tu-berlin.de>
      11                 :            :  *     Theodore Ts'o <tytso@mit.edu>
      12                 :            :  *
      13                 :            :  * Made to use alloc_percpu by Christoph Lameter.
      14                 :            :  *
      15                 :            :  * Copyright (C) 2010           SUSE Linux Products GmbH
      16                 :            :  * Copyright (C) 2010           Tejun Heo <tj@kernel.org>
      17                 :            :  *
      18                 :            :  * This is the generic async execution mechanism.  Work items as are
      19                 :            :  * executed in process context.  The worker pool is shared and
      20                 :            :  * automatically managed.  There are two worker pools for each CPU (one for
      21                 :            :  * normal work items and the other for high priority ones) and some extra
      22                 :            :  * pools for workqueues which are not bound to any specific CPU - the
      23                 :            :  * number of these backing pools is dynamic.
      24                 :            :  *
      25                 :            :  * Please read Documentation/core-api/workqueue.rst for details.
      26                 :            :  */
      27                 :            : 
      28                 :            : #include <linux/export.h>
      29                 :            : #include <linux/kernel.h>
      30                 :            : #include <linux/sched.h>
      31                 :            : #include <linux/init.h>
      32                 :            : #include <linux/signal.h>
      33                 :            : #include <linux/completion.h>
      34                 :            : #include <linux/workqueue.h>
      35                 :            : #include <linux/slab.h>
      36                 :            : #include <linux/cpu.h>
      37                 :            : #include <linux/notifier.h>
      38                 :            : #include <linux/kthread.h>
      39                 :            : #include <linux/hardirq.h>
      40                 :            : #include <linux/mempolicy.h>
      41                 :            : #include <linux/freezer.h>
      42                 :            : #include <linux/debug_locks.h>
      43                 :            : #include <linux/lockdep.h>
      44                 :            : #include <linux/idr.h>
      45                 :            : #include <linux/jhash.h>
      46                 :            : #include <linux/hashtable.h>
      47                 :            : #include <linux/rculist.h>
      48                 :            : #include <linux/nodemask.h>
      49                 :            : #include <linux/moduleparam.h>
      50                 :            : #include <linux/uaccess.h>
      51                 :            : #include <linux/sched/isolation.h>
      52                 :            : #include <linux/nmi.h>
      53                 :            : 
      54                 :            : #include "workqueue_internal.h"
      55                 :            : 
      56                 :            : enum {
      57                 :            :         /*
      58                 :            :          * worker_pool flags
      59                 :            :          *
      60                 :            :          * A bound pool is either associated or disassociated with its CPU.
      61                 :            :          * While associated (!DISASSOCIATED), all workers are bound to the
      62                 :            :          * CPU and none has %WORKER_UNBOUND set and concurrency management
      63                 :            :          * is in effect.
      64                 :            :          *
      65                 :            :          * While DISASSOCIATED, the cpu may be offline and all workers have
      66                 :            :          * %WORKER_UNBOUND set and concurrency management disabled, and may
      67                 :            :          * be executing on any CPU.  The pool behaves as an unbound one.
      68                 :            :          *
      69                 :            :          * Note that DISASSOCIATED should be flipped only while holding
      70                 :            :          * wq_pool_attach_mutex to avoid changing binding state while
      71                 :            :          * worker_attach_to_pool() is in progress.
      72                 :            :          */
      73                 :            :         POOL_MANAGER_ACTIVE     = 1 << 0, /* being managed */
      74                 :            :         POOL_DISASSOCIATED      = 1 << 2, /* cpu can't serve workers */
      75                 :            : 
      76                 :            :         /* worker flags */
      77                 :            :         WORKER_DIE              = 1 << 1, /* die die die */
      78                 :            :         WORKER_IDLE             = 1 << 2, /* is idle */
      79                 :            :         WORKER_PREP             = 1 << 3, /* preparing to run works */
      80                 :            :         WORKER_CPU_INTENSIVE    = 1 << 6, /* cpu intensive */
      81                 :            :         WORKER_UNBOUND          = 1 << 7, /* worker is unbound */
      82                 :            :         WORKER_REBOUND          = 1 << 8, /* worker was rebound */
      83                 :            : 
      84                 :            :         WORKER_NOT_RUNNING      = WORKER_PREP | WORKER_CPU_INTENSIVE |
      85                 :            :                                   WORKER_UNBOUND | WORKER_REBOUND,
      86                 :            : 
      87                 :            :         NR_STD_WORKER_POOLS     = 2,            /* # standard pools per cpu */
      88                 :            : 
      89                 :            :         UNBOUND_POOL_HASH_ORDER = 6,            /* hashed by pool->attrs */
      90                 :            :         BUSY_WORKER_HASH_ORDER  = 6,            /* 64 pointers */
      91                 :            : 
      92                 :            :         MAX_IDLE_WORKERS_RATIO  = 4,            /* 1/4 of busy can be idle */
      93                 :            :         IDLE_WORKER_TIMEOUT     = 300 * HZ,     /* keep idle ones for 5 mins */
      94                 :            : 
      95                 :            :         MAYDAY_INITIAL_TIMEOUT  = HZ / 100 >= 2 ? HZ / 100 : 2,
      96                 :            :                                                 /* call for help after 10ms
      97                 :            :                                                    (min two ticks) */
      98                 :            :         MAYDAY_INTERVAL         = HZ / 10,      /* and then every 100ms */
      99                 :            :         CREATE_COOLDOWN         = HZ,           /* time to breath after fail */
     100                 :            : 
     101                 :            :         /*
     102                 :            :          * Rescue workers are used only on emergencies and shared by
     103                 :            :          * all cpus.  Give MIN_NICE.
     104                 :            :          */
     105                 :            :         RESCUER_NICE_LEVEL      = MIN_NICE,
     106                 :            :         HIGHPRI_NICE_LEVEL      = MIN_NICE,
     107                 :            : 
     108                 :            :         WQ_NAME_LEN             = 24,
     109                 :            : };
     110                 :            : 
     111                 :            : /*
     112                 :            :  * Structure fields follow one of the following exclusion rules.
     113                 :            :  *
     114                 :            :  * I: Modifiable by initialization/destruction paths and read-only for
     115                 :            :  *    everyone else.
     116                 :            :  *
     117                 :            :  * P: Preemption protected.  Disabling preemption is enough and should
     118                 :            :  *    only be modified and accessed from the local cpu.
     119                 :            :  *
     120                 :            :  * L: pool->lock protected.  Access with pool->lock held.
     121                 :            :  *
     122                 :            :  * X: During normal operation, modification requires pool->lock and should
     123                 :            :  *    be done only from local cpu.  Either disabling preemption on local
     124                 :            :  *    cpu or grabbing pool->lock is enough for read access.  If
     125                 :            :  *    POOL_DISASSOCIATED is set, it's identical to L.
     126                 :            :  *
     127                 :            :  * A: wq_pool_attach_mutex protected.
     128                 :            :  *
     129                 :            :  * PL: wq_pool_mutex protected.
     130                 :            :  *
     131                 :            :  * PR: wq_pool_mutex protected for writes.  RCU protected for reads.
     132                 :            :  *
     133                 :            :  * PW: wq_pool_mutex and wq->mutex protected for writes.  Either for reads.
     134                 :            :  *
     135                 :            :  * PWR: wq_pool_mutex and wq->mutex protected for writes.  Either or
     136                 :            :  *      RCU for reads.
     137                 :            :  *
     138                 :            :  * WQ: wq->mutex protected.
     139                 :            :  *
     140                 :            :  * WR: wq->mutex protected for writes.  RCU protected for reads.
     141                 :            :  *
     142                 :            :  * MD: wq_mayday_lock protected.
     143                 :            :  */
     144                 :            : 
     145                 :            : /* struct worker is defined in workqueue_internal.h */
     146                 :            : 
     147                 :            : struct worker_pool {
     148                 :            :         spinlock_t              lock;           /* the pool lock */
     149                 :            :         int                     cpu;            /* I: the associated cpu */
     150                 :            :         int                     node;           /* I: the associated node ID */
     151                 :            :         int                     id;             /* I: pool ID */
     152                 :            :         unsigned int            flags;          /* X: flags */
     153                 :            : 
     154                 :            :         unsigned long           watchdog_ts;    /* L: watchdog timestamp */
     155                 :            : 
     156                 :            :         struct list_head        worklist;       /* L: list of pending works */
     157                 :            : 
     158                 :            :         int                     nr_workers;     /* L: total number of workers */
     159                 :            :         int                     nr_idle;        /* L: currently idle workers */
     160                 :            : 
     161                 :            :         struct list_head        idle_list;      /* X: list of idle workers */
     162                 :            :         struct timer_list       idle_timer;     /* L: worker idle timeout */
     163                 :            :         struct timer_list       mayday_timer;   /* L: SOS timer for workers */
     164                 :            : 
     165                 :            :         /* a workers is either on busy_hash or idle_list, or the manager */
     166                 :            :         DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
     167                 :            :                                                 /* L: hash of busy workers */
     168                 :            : 
     169                 :            :         struct worker           *manager;       /* L: purely informational */
     170                 :            :         struct list_head        workers;        /* A: attached workers */
     171                 :            :         struct completion       *detach_completion; /* all workers detached */
     172                 :            : 
     173                 :            :         struct ida              worker_ida;     /* worker IDs for task name */
     174                 :            : 
     175                 :            :         struct workqueue_attrs  *attrs;         /* I: worker attributes */
     176                 :            :         struct hlist_node       hash_node;      /* PL: unbound_pool_hash node */
     177                 :            :         int                     refcnt;         /* PL: refcnt for unbound pools */
     178                 :            : 
     179                 :            :         /*
     180                 :            :          * The current concurrency level.  As it's likely to be accessed
     181                 :            :          * from other CPUs during try_to_wake_up(), put it in a separate
     182                 :            :          * cacheline.
     183                 :            :          */
     184                 :            :         atomic_t                nr_running ____cacheline_aligned_in_smp;
     185                 :            : 
     186                 :            :         /*
     187                 :            :          * Destruction of pool is RCU protected to allow dereferences
     188                 :            :          * from get_work_pool().
     189                 :            :          */
     190                 :            :         struct rcu_head         rcu;
     191                 :            : } ____cacheline_aligned_in_smp;
     192                 :            : 
     193                 :            : /*
     194                 :            :  * The per-pool workqueue.  While queued, the lower WORK_STRUCT_FLAG_BITS
     195                 :            :  * of work_struct->data are used for flags and the remaining high bits
     196                 :            :  * point to the pwq; thus, pwqs need to be aligned at two's power of the
     197                 :            :  * number of flag bits.
     198                 :            :  */
     199                 :            : struct pool_workqueue {
     200                 :            :         struct worker_pool      *pool;          /* I: the associated pool */
     201                 :            :         struct workqueue_struct *wq;            /* I: the owning workqueue */
     202                 :            :         int                     work_color;     /* L: current color */
     203                 :            :         int                     flush_color;    /* L: flushing color */
     204                 :            :         int                     refcnt;         /* L: reference count */
     205                 :            :         int                     nr_in_flight[WORK_NR_COLORS];
     206                 :            :                                                 /* L: nr of in_flight works */
     207                 :            :         int                     nr_active;      /* L: nr of active works */
     208                 :            :         int                     max_active;     /* L: max active works */
     209                 :            :         struct list_head        delayed_works;  /* L: delayed works */
     210                 :            :         struct list_head        pwqs_node;      /* WR: node on wq->pwqs */
     211                 :            :         struct list_head        mayday_node;    /* MD: node on wq->maydays */
     212                 :            : 
     213                 :            :         /*
     214                 :            :          * Release of unbound pwq is punted to system_wq.  See put_pwq()
     215                 :            :          * and pwq_unbound_release_workfn() for details.  pool_workqueue
     216                 :            :          * itself is also RCU protected so that the first pwq can be
     217                 :            :          * determined without grabbing wq->mutex.
     218                 :            :          */
     219                 :            :         struct work_struct      unbound_release_work;
     220                 :            :         struct rcu_head         rcu;
     221                 :            : } __aligned(1 << WORK_STRUCT_FLAG_BITS);
     222                 :            : 
     223                 :            : /*
     224                 :            :  * Structure used to wait for workqueue flush.
     225                 :            :  */
     226                 :            : struct wq_flusher {
     227                 :            :         struct list_head        list;           /* WQ: list of flushers */
     228                 :            :         int                     flush_color;    /* WQ: flush color waiting for */
     229                 :            :         struct completion       done;           /* flush completion */
     230                 :            : };
     231                 :            : 
     232                 :            : struct wq_device;
     233                 :            : 
     234                 :            : /*
     235                 :            :  * The externally visible workqueue.  It relays the issued work items to
     236                 :            :  * the appropriate worker_pool through its pool_workqueues.
     237                 :            :  */
     238                 :            : struct workqueue_struct {
     239                 :            :         struct list_head        pwqs;           /* WR: all pwqs of this wq */
     240                 :            :         struct list_head        list;           /* PR: list of all workqueues */
     241                 :            : 
     242                 :            :         struct mutex            mutex;          /* protects this wq */
     243                 :            :         int                     work_color;     /* WQ: current work color */
     244                 :            :         int                     flush_color;    /* WQ: current flush color */
     245                 :            :         atomic_t                nr_pwqs_to_flush; /* flush in progress */
     246                 :            :         struct wq_flusher       *first_flusher; /* WQ: first flusher */
     247                 :            :         struct list_head        flusher_queue;  /* WQ: flush waiters */
     248                 :            :         struct list_head        flusher_overflow; /* WQ: flush overflow list */
     249                 :            : 
     250                 :            :         struct list_head        maydays;        /* MD: pwqs requesting rescue */
     251                 :            :         struct worker           *rescuer;       /* I: rescue worker */
     252                 :            : 
     253                 :            :         int                     nr_drainers;    /* WQ: drain in progress */
     254                 :            :         int                     saved_max_active; /* WQ: saved pwq max_active */
     255                 :            : 
     256                 :            :         struct workqueue_attrs  *unbound_attrs; /* PW: only for unbound wqs */
     257                 :            :         struct pool_workqueue   *dfl_pwq;       /* PW: only for unbound wqs */
     258                 :            : 
     259                 :            : #ifdef CONFIG_SYSFS
     260                 :            :         struct wq_device        *wq_dev;        /* I: for sysfs interface */
     261                 :            : #endif
     262                 :            : #ifdef CONFIG_LOCKDEP
     263                 :            :         char                    *lock_name;
     264                 :            :         struct lock_class_key   key;
     265                 :            :         struct lockdep_map      lockdep_map;
     266                 :            : #endif
     267                 :            :         char                    name[WQ_NAME_LEN]; /* I: workqueue name */
     268                 :            : 
     269                 :            :         /*
     270                 :            :          * Destruction of workqueue_struct is RCU protected to allow walking
     271                 :            :          * the workqueues list without grabbing wq_pool_mutex.
     272                 :            :          * This is used to dump all workqueues from sysrq.
     273                 :            :          */
     274                 :            :         struct rcu_head         rcu;
     275                 :            : 
     276                 :            :         /* hot fields used during command issue, aligned to cacheline */
     277                 :            :         unsigned int            flags ____cacheline_aligned; /* WQ: WQ_* flags */
     278                 :            :         struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
     279                 :            :         struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
     280                 :            : };
     281                 :            : 
     282                 :            : static struct kmem_cache *pwq_cache;
     283                 :            : 
     284                 :            : static cpumask_var_t *wq_numa_possible_cpumask;
     285                 :            :                                         /* possible CPUs of each node */
     286                 :            : 
     287                 :            : static bool wq_disable_numa;
     288                 :            : module_param_named(disable_numa, wq_disable_numa, bool, 0444);
     289                 :            : 
     290                 :            : /* see the comment above the definition of WQ_POWER_EFFICIENT */
     291                 :            : static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
     292                 :            : module_param_named(power_efficient, wq_power_efficient, bool, 0444);
     293                 :            : 
     294                 :            : static bool wq_online;                  /* can kworkers be created yet? */
     295                 :            : 
     296                 :            : static bool wq_numa_enabled;            /* unbound NUMA affinity enabled */
     297                 :            : 
     298                 :            : /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
     299                 :            : static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
     300                 :            : 
     301                 :            : static DEFINE_MUTEX(wq_pool_mutex);     /* protects pools and workqueues list */
     302                 :            : static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
     303                 :            : static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
     304                 :            : static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
     305                 :            : 
     306                 :            : static LIST_HEAD(workqueues);           /* PR: list of all workqueues */
     307                 :            : static bool workqueue_freezing;         /* PL: have wqs started freezing? */
     308                 :            : 
     309                 :            : /* PL: allowable cpus for unbound wqs and work items */
     310                 :            : static cpumask_var_t wq_unbound_cpumask;
     311                 :            : 
     312                 :            : /* CPU where unbound work was last round robin scheduled from this CPU */
     313                 :            : static DEFINE_PER_CPU(int, wq_rr_cpu_last);
     314                 :            : 
     315                 :            : /*
     316                 :            :  * Local execution of unbound work items is no longer guaranteed.  The
     317                 :            :  * following always forces round-robin CPU selection on unbound work items
     318                 :            :  * to uncover usages which depend on it.
     319                 :            :  */
     320                 :            : #ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
     321                 :            : static bool wq_debug_force_rr_cpu = true;
     322                 :            : #else
     323                 :            : static bool wq_debug_force_rr_cpu = false;
     324                 :            : #endif
     325                 :            : module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
     326                 :            : 
     327                 :            : /* the per-cpu worker pools */
     328                 :            : static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
     329                 :            : 
     330                 :            : static DEFINE_IDR(worker_pool_idr);     /* PR: idr of all pools */
     331                 :            : 
     332                 :            : /* PL: hash of all unbound pools keyed by pool->attrs */
     333                 :            : static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
     334                 :            : 
     335                 :            : /* I: attributes used when instantiating standard unbound pools on demand */
     336                 :            : static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
     337                 :            : 
     338                 :            : /* I: attributes used when instantiating ordered pools on demand */
     339                 :            : static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
     340                 :            : 
     341                 :            : struct workqueue_struct *system_wq __read_mostly;
     342                 :            : EXPORT_SYMBOL(system_wq);
     343                 :            : struct workqueue_struct *system_highpri_wq __read_mostly;
     344                 :            : EXPORT_SYMBOL_GPL(system_highpri_wq);
     345                 :            : struct workqueue_struct *system_long_wq __read_mostly;
     346                 :            : EXPORT_SYMBOL_GPL(system_long_wq);
     347                 :            : struct workqueue_struct *system_unbound_wq __read_mostly;
     348                 :            : EXPORT_SYMBOL_GPL(system_unbound_wq);
     349                 :            : struct workqueue_struct *system_freezable_wq __read_mostly;
     350                 :            : EXPORT_SYMBOL_GPL(system_freezable_wq);
     351                 :            : struct workqueue_struct *system_power_efficient_wq __read_mostly;
     352                 :            : EXPORT_SYMBOL_GPL(system_power_efficient_wq);
     353                 :            : struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
     354                 :            : EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
     355                 :            : 
     356                 :            : static int worker_thread(void *__worker);
     357                 :            : static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
     358                 :            : 
     359                 :            : #define CREATE_TRACE_POINTS
     360                 :            : #include <trace/events/workqueue.h>
     361                 :            : 
     362                 :            : #define assert_rcu_or_pool_mutex()                                      \
     363                 :            :         RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
     364                 :            :                          !lockdep_is_held(&wq_pool_mutex),          \
     365                 :            :                          "RCU or wq_pool_mutex should be held")
     366                 :            : 
     367                 :            : #define assert_rcu_or_wq_mutex(wq)                                      \
     368                 :            :         RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
     369                 :            :                          !lockdep_is_held(&wq->mutex),                   \
     370                 :            :                          "RCU or wq->mutex should be held")
     371                 :            : 
     372                 :            : #define assert_rcu_or_wq_mutex_or_pool_mutex(wq)                        \
     373                 :            :         RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
     374                 :            :                          !lockdep_is_held(&wq->mutex) &&         \
     375                 :            :                          !lockdep_is_held(&wq_pool_mutex),          \
     376                 :            :                          "RCU, wq->mutex or wq_pool_mutex should be held")
     377                 :            : 
     378                 :            : #define for_each_cpu_worker_pool(pool, cpu)                             \
     379                 :            :         for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];           \
     380                 :            :              (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
     381                 :            :              (pool)++)
     382                 :            : 
     383                 :            : /**
     384                 :            :  * for_each_pool - iterate through all worker_pools in the system
     385                 :            :  * @pool: iteration cursor
     386                 :            :  * @pi: integer used for iteration
     387                 :            :  *
     388                 :            :  * This must be called either with wq_pool_mutex held or RCU read
     389                 :            :  * locked.  If the pool needs to be used beyond the locking in effect, the
     390                 :            :  * caller is responsible for guaranteeing that the pool stays online.
     391                 :            :  *
     392                 :            :  * The if/else clause exists only for the lockdep assertion and can be
     393                 :            :  * ignored.
     394                 :            :  */
     395                 :            : #define for_each_pool(pool, pi)                                         \
     396                 :            :         idr_for_each_entry(&worker_pool_idr, pool, pi)                      \
     397                 :            :                 if (({ assert_rcu_or_pool_mutex(); false; })) { }       \
     398                 :            :                 else
     399                 :            : 
     400                 :            : /**
     401                 :            :  * for_each_pool_worker - iterate through all workers of a worker_pool
     402                 :            :  * @worker: iteration cursor
     403                 :            :  * @pool: worker_pool to iterate workers of
     404                 :            :  *
     405                 :            :  * This must be called with wq_pool_attach_mutex.
     406                 :            :  *
     407                 :            :  * The if/else clause exists only for the lockdep assertion and can be
     408                 :            :  * ignored.
     409                 :            :  */
     410                 :            : #define for_each_pool_worker(worker, pool)                              \
     411                 :            :         list_for_each_entry((worker), &(pool)->workers, node)            \
     412                 :            :                 if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
     413                 :            :                 else
     414                 :            : 
     415                 :            : /**
     416                 :            :  * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
     417                 :            :  * @pwq: iteration cursor
     418                 :            :  * @wq: the target workqueue
     419                 :            :  *
     420                 :            :  * This must be called either with wq->mutex held or RCU read locked.
     421                 :            :  * If the pwq needs to be used beyond the locking in effect, the caller is
     422                 :            :  * responsible for guaranteeing that the pwq stays online.
     423                 :            :  *
     424                 :            :  * The if/else clause exists only for the lockdep assertion and can be
     425                 :            :  * ignored.
     426                 :            :  */
     427                 :            : #define for_each_pwq(pwq, wq)                                           \
     428                 :            :         list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,           \
     429                 :            :                                 lockdep_is_held(&wq->mutex))             \
     430                 :            :                 if (({ assert_rcu_or_wq_mutex(wq); false; })) { }       \
     431                 :            :                 else
     432                 :            : 
     433                 :            : #ifdef CONFIG_DEBUG_OBJECTS_WORK
     434                 :            : 
     435                 :            : static struct debug_obj_descr work_debug_descr;
     436                 :            : 
     437                 :            : static void *work_debug_hint(void *addr)
     438                 :            : {
     439                 :            :         return ((struct work_struct *) addr)->func;
     440                 :            : }
     441                 :            : 
     442                 :            : static bool work_is_static_object(void *addr)
     443                 :            : {
     444                 :            :         struct work_struct *work = addr;
     445                 :            : 
     446                 :            :         return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
     447                 :            : }
     448                 :            : 
     449                 :            : /*
     450                 :            :  * fixup_init is called when:
     451                 :            :  * - an active object is initialized
     452                 :            :  */
     453                 :            : static bool work_fixup_init(void *addr, enum debug_obj_state state)
     454                 :            : {
     455                 :            :         struct work_struct *work = addr;
     456                 :            : 
     457                 :            :         switch (state) {
     458                 :            :         case ODEBUG_STATE_ACTIVE:
     459                 :            :                 cancel_work_sync(work);
     460                 :            :                 debug_object_init(work, &work_debug_descr);
     461                 :            :                 return true;
     462                 :            :         default:
     463                 :            :                 return false;
     464                 :            :         }
     465                 :            : }
     466                 :            : 
     467                 :            : /*
     468                 :            :  * fixup_free is called when:
     469                 :            :  * - an active object is freed
     470                 :            :  */
     471                 :            : static bool work_fixup_free(void *addr, enum debug_obj_state state)
     472                 :            : {
     473                 :            :         struct work_struct *work = addr;
     474                 :            : 
     475                 :            :         switch (state) {
     476                 :            :         case ODEBUG_STATE_ACTIVE:
     477                 :            :                 cancel_work_sync(work);
     478                 :            :                 debug_object_free(work, &work_debug_descr);
     479                 :            :                 return true;
     480                 :            :         default:
     481                 :            :                 return false;
     482                 :            :         }
     483                 :            : }
     484                 :            : 
     485                 :            : static struct debug_obj_descr work_debug_descr = {
     486                 :            :         .name           = "work_struct",
     487                 :            :         .debug_hint     = work_debug_hint,
     488                 :            :         .is_static_object = work_is_static_object,
     489                 :            :         .fixup_init     = work_fixup_init,
     490                 :            :         .fixup_free     = work_fixup_free,
     491                 :            : };
     492                 :            : 
     493                 :            : static inline void debug_work_activate(struct work_struct *work)
     494                 :            : {
     495                 :            :         debug_object_activate(work, &work_debug_descr);
     496                 :            : }
     497                 :            : 
     498                 :            : static inline void debug_work_deactivate(struct work_struct *work)
     499                 :            : {
     500                 :            :         debug_object_deactivate(work, &work_debug_descr);
     501                 :            : }
     502                 :            : 
     503                 :            : void __init_work(struct work_struct *work, int onstack)
     504                 :            : {
     505                 :            :         if (onstack)
     506                 :            :                 debug_object_init_on_stack(work, &work_debug_descr);
     507                 :            :         else
     508                 :            :                 debug_object_init(work, &work_debug_descr);
     509                 :            : }
     510                 :            : EXPORT_SYMBOL_GPL(__init_work);
     511                 :            : 
     512                 :            : void destroy_work_on_stack(struct work_struct *work)
     513                 :            : {
     514                 :            :         debug_object_free(work, &work_debug_descr);
     515                 :            : }
     516                 :            : EXPORT_SYMBOL_GPL(destroy_work_on_stack);
     517                 :            : 
     518                 :            : void destroy_delayed_work_on_stack(struct delayed_work *work)
     519                 :            : {
     520                 :            :         destroy_timer_on_stack(&work->timer);
     521                 :            :         debug_object_free(&work->work, &work_debug_descr);
     522                 :            : }
     523                 :            : EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
     524                 :            : 
     525                 :            : #else
     526                 :            : static inline void debug_work_activate(struct work_struct *work) { }
     527                 :            : static inline void debug_work_deactivate(struct work_struct *work) { }
     528                 :            : #endif
     529                 :            : 
     530                 :            : /**
     531                 :            :  * worker_pool_assign_id - allocate ID and assing it to @pool
     532                 :            :  * @pool: the pool pointer of interest
     533                 :            :  *
     534                 :            :  * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
     535                 :            :  * successfully, -errno on failure.
     536                 :            :  */
     537                 :          3 : static int worker_pool_assign_id(struct worker_pool *pool)
     538                 :            : {
     539                 :            :         int ret;
     540                 :            : 
     541                 :            :         lockdep_assert_held(&wq_pool_mutex);
     542                 :            : 
     543                 :          3 :         ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
     544                 :            :                         GFP_KERNEL);
     545                 :          3 :         if (ret >= 0) {
     546                 :          3 :                 pool->id = ret;
     547                 :          3 :                 return 0;
     548                 :            :         }
     549                 :            :         return ret;
     550                 :            : }
     551                 :            : 
     552                 :            : /**
     553                 :            :  * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
     554                 :            :  * @wq: the target workqueue
     555                 :            :  * @node: the node ID
     556                 :            :  *
     557                 :            :  * This must be called with any of wq_pool_mutex, wq->mutex or RCU
     558                 :            :  * read locked.
     559                 :            :  * If the pwq needs to be used beyond the locking in effect, the caller is
     560                 :            :  * responsible for guaranteeing that the pwq stays online.
     561                 :            :  *
     562                 :            :  * Return: The unbound pool_workqueue for @node.
     563                 :            :  */
     564                 :            : static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
     565                 :            :                                                   int node)
     566                 :            : {
     567                 :            :         assert_rcu_or_wq_mutex_or_pool_mutex(wq);
     568                 :            : 
     569                 :            :         /*
     570                 :            :          * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
     571                 :            :          * delayed item is pending.  The plan is to keep CPU -> NODE
     572                 :            :          * mapping valid and stable across CPU on/offlines.  Once that
     573                 :            :          * happens, this workaround can be removed.
     574                 :            :          */
     575                 :            :         if (unlikely(node == NUMA_NO_NODE))
     576                 :            :                 return wq->dfl_pwq;
     577                 :            : 
     578                 :          3 :         return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
     579                 :            : }
     580                 :            : 
     581                 :            : static unsigned int work_color_to_flags(int color)
     582                 :            : {
     583                 :          3 :         return color << WORK_STRUCT_COLOR_SHIFT;
     584                 :            : }
     585                 :            : 
     586                 :            : static int get_work_color(struct work_struct *work)
     587                 :            : {
     588                 :          3 :         return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
     589                 :            :                 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
     590                 :            : }
     591                 :            : 
     592                 :            : static int work_next_color(int color)
     593                 :            : {
     594                 :          3 :         return (color + 1) % WORK_NR_COLORS;
     595                 :            : }
     596                 :            : 
     597                 :            : /*
     598                 :            :  * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
     599                 :            :  * contain the pointer to the queued pwq.  Once execution starts, the flag
     600                 :            :  * is cleared and the high bits contain OFFQ flags and pool ID.
     601                 :            :  *
     602                 :            :  * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
     603                 :            :  * and clear_work_data() can be used to set the pwq, pool or clear
     604                 :            :  * work->data.  These functions should only be called while the work is
     605                 :            :  * owned - ie. while the PENDING bit is set.
     606                 :            :  *
     607                 :            :  * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
     608                 :            :  * corresponding to a work.  Pool is available once the work has been
     609                 :            :  * queued anywhere after initialization until it is sync canceled.  pwq is
     610                 :            :  * available only while the work item is queued.
     611                 :            :  *
     612                 :            :  * %WORK_OFFQ_CANCELING is used to mark a work item which is being
     613                 :            :  * canceled.  While being canceled, a work item may have its PENDING set
     614                 :            :  * but stay off timer and worklist for arbitrarily long and nobody should
     615                 :            :  * try to steal the PENDING bit.
     616                 :            :  */
     617                 :          3 : static inline void set_work_data(struct work_struct *work, unsigned long data,
     618                 :            :                                  unsigned long flags)
     619                 :            : {
     620                 :          3 :         WARN_ON_ONCE(!work_pending(work));
     621                 :          3 :         atomic_long_set(&work->data, data | flags | work_static(work));
     622                 :          3 : }
     623                 :            : 
     624                 :            : static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
     625                 :            :                          unsigned long extra_flags)
     626                 :            : {
     627                 :          3 :         set_work_data(work, (unsigned long)pwq,
     628                 :            :                       WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
     629                 :            : }
     630                 :            : 
     631                 :            : static void set_work_pool_and_keep_pending(struct work_struct *work,
     632                 :            :                                            int pool_id)
     633                 :            : {
     634                 :          3 :         set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
     635                 :            :                       WORK_STRUCT_PENDING);
     636                 :            : }
     637                 :            : 
     638                 :            : static void set_work_pool_and_clear_pending(struct work_struct *work,
     639                 :            :                                             int pool_id)
     640                 :            : {
     641                 :            :         /*
     642                 :            :          * The following wmb is paired with the implied mb in
     643                 :            :          * test_and_set_bit(PENDING) and ensures all updates to @work made
     644                 :            :          * here are visible to and precede any updates by the next PENDING
     645                 :            :          * owner.
     646                 :            :          */
     647                 :          3 :         smp_wmb();
     648                 :          3 :         set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
     649                 :            :         /*
     650                 :            :          * The following mb guarantees that previous clear of a PENDING bit
     651                 :            :          * will not be reordered with any speculative LOADS or STORES from
     652                 :            :          * work->current_func, which is executed afterwards.  This possible
     653                 :            :          * reordering can lead to a missed execution on attempt to queue
     654                 :            :          * the same @work.  E.g. consider this case:
     655                 :            :          *
     656                 :            :          *   CPU#0                         CPU#1
     657                 :            :          *   ----------------------------  --------------------------------
     658                 :            :          *
     659                 :            :          * 1  STORE event_indicated
     660                 :            :          * 2  queue_work_on() {
     661                 :            :          * 3    test_and_set_bit(PENDING)
     662                 :            :          * 4 }                             set_..._and_clear_pending() {
     663                 :            :          * 5                                 set_work_data() # clear bit
     664                 :            :          * 6                                 smp_mb()
     665                 :            :          * 7                               work->current_func() {
     666                 :            :          * 8                                  LOAD event_indicated
     667                 :            :          *                                 }
     668                 :            :          *
     669                 :            :          * Without an explicit full barrier speculative LOAD on line 8 can
     670                 :            :          * be executed before CPU#0 does STORE on line 1.  If that happens,
     671                 :            :          * CPU#0 observes the PENDING bit is still set and new execution of
     672                 :            :          * a @work is not queued in a hope, that CPU#1 will eventually
     673                 :            :          * finish the queued @work.  Meanwhile CPU#1 does not see
     674                 :            :          * event_indicated is set, because speculative LOAD was executed
     675                 :            :          * before actual STORE.
     676                 :            :          */
     677                 :          3 :         smp_mb();
     678                 :            : }
     679                 :            : 
     680                 :            : static void clear_work_data(struct work_struct *work)
     681                 :            : {
     682                 :          3 :         smp_wmb();      /* see set_work_pool_and_clear_pending() */
     683                 :          3 :         set_work_data(work, WORK_STRUCT_NO_POOL, 0);
     684                 :            : }
     685                 :            : 
     686                 :            : static struct pool_workqueue *get_work_pwq(struct work_struct *work)
     687                 :            : {
     688                 :            :         unsigned long data = atomic_long_read(&work->data);
     689                 :            : 
     690                 :          3 :         if (data & WORK_STRUCT_PWQ)
     691                 :          3 :                 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
     692                 :            :         else
     693                 :            :                 return NULL;
     694                 :            : }
     695                 :            : 
     696                 :            : /**
     697                 :            :  * get_work_pool - return the worker_pool a given work was associated with
     698                 :            :  * @work: the work item of interest
     699                 :            :  *
     700                 :            :  * Pools are created and destroyed under wq_pool_mutex, and allows read
     701                 :            :  * access under RCU read lock.  As such, this function should be
     702                 :            :  * called under wq_pool_mutex or inside of a rcu_read_lock() region.
     703                 :            :  *
     704                 :            :  * All fields of the returned pool are accessible as long as the above
     705                 :            :  * mentioned locking is in effect.  If the returned pool needs to be used
     706                 :            :  * beyond the critical section, the caller is responsible for ensuring the
     707                 :            :  * returned pool is and stays online.
     708                 :            :  *
     709                 :            :  * Return: The worker_pool @work was last associated with.  %NULL if none.
     710                 :            :  */
     711                 :          3 : static struct worker_pool *get_work_pool(struct work_struct *work)
     712                 :            : {
     713                 :          3 :         unsigned long data = atomic_long_read(&work->data);
     714                 :            :         int pool_id;
     715                 :            : 
     716                 :            :         assert_rcu_or_pool_mutex();
     717                 :            : 
     718                 :          3 :         if (data & WORK_STRUCT_PWQ)
     719                 :          3 :                 return ((struct pool_workqueue *)
     720                 :          3 :                         (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
     721                 :            : 
     722                 :          3 :         pool_id = data >> WORK_OFFQ_POOL_SHIFT;
     723                 :          3 :         if (pool_id == WORK_OFFQ_POOL_NONE)
     724                 :            :                 return NULL;
     725                 :            : 
     726                 :          3 :         return idr_find(&worker_pool_idr, pool_id);
     727                 :            : }
     728                 :            : 
     729                 :            : /**
     730                 :            :  * get_work_pool_id - return the worker pool ID a given work is associated with
     731                 :            :  * @work: the work item of interest
     732                 :            :  *
     733                 :            :  * Return: The worker_pool ID @work was last associated with.
     734                 :            :  * %WORK_OFFQ_POOL_NONE if none.
     735                 :            :  */
     736                 :            : static int get_work_pool_id(struct work_struct *work)
     737                 :            : {
     738                 :            :         unsigned long data = atomic_long_read(&work->data);
     739                 :            : 
     740                 :          3 :         if (data & WORK_STRUCT_PWQ)
     741                 :          0 :                 return ((struct pool_workqueue *)
     742                 :          0 :                         (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
     743                 :            : 
     744                 :          3 :         return data >> WORK_OFFQ_POOL_SHIFT;
     745                 :            : }
     746                 :            : 
     747                 :          3 : static void mark_work_canceling(struct work_struct *work)
     748                 :            : {
     749                 :          3 :         unsigned long pool_id = get_work_pool_id(work);
     750                 :            : 
     751                 :          3 :         pool_id <<= WORK_OFFQ_POOL_SHIFT;
     752                 :          3 :         set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
     753                 :          3 : }
     754                 :            : 
     755                 :            : static bool work_is_canceling(struct work_struct *work)
     756                 :            : {
     757                 :            :         unsigned long data = atomic_long_read(&work->data);
     758                 :            : 
     759                 :          3 :         return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
     760                 :            : }
     761                 :            : 
     762                 :            : /*
     763                 :            :  * Policy functions.  These define the policies on how the global worker
     764                 :            :  * pools are managed.  Unless noted otherwise, these functions assume that
     765                 :            :  * they're being called with pool->lock held.
     766                 :            :  */
     767                 :            : 
     768                 :            : static bool __need_more_worker(struct worker_pool *pool)
     769                 :            : {
     770                 :            :         return !atomic_read(&pool->nr_running);
     771                 :            : }
     772                 :            : 
     773                 :            : /*
     774                 :            :  * Need to wake up a worker?  Called from anything but currently
     775                 :            :  * running workers.
     776                 :            :  *
     777                 :            :  * Note that, because unbound workers never contribute to nr_running, this
     778                 :            :  * function will always return %true for unbound pools as long as the
     779                 :            :  * worklist isn't empty.
     780                 :            :  */
     781                 :            : static bool need_more_worker(struct worker_pool *pool)
     782                 :            : {
     783                 :          3 :         return !list_empty(&pool->worklist) && __need_more_worker(pool);
     784                 :            : }
     785                 :            : 
     786                 :            : /* Can I start working?  Called from busy but !running workers. */
     787                 :            : static bool may_start_working(struct worker_pool *pool)
     788                 :            : {
     789                 :          3 :         return pool->nr_idle;
     790                 :            : }
     791                 :            : 
     792                 :            : /* Do I need to keep working?  Called from currently running workers. */
     793                 :            : static bool keep_working(struct worker_pool *pool)
     794                 :            : {
     795                 :          3 :         return !list_empty(&pool->worklist) &&
     796                 :          3 :                 atomic_read(&pool->nr_running) <= 1;
     797                 :            : }
     798                 :            : 
     799                 :            : /* Do we need a new worker?  Called from manager. */
     800                 :            : static bool need_to_create_worker(struct worker_pool *pool)
     801                 :            : {
     802                 :          3 :         return need_more_worker(pool) && !may_start_working(pool);
     803                 :            : }
     804                 :            : 
     805                 :            : /* Do we have too many workers and should some go away? */
     806                 :            : static bool too_many_workers(struct worker_pool *pool)
     807                 :            : {
     808                 :          3 :         bool managing = pool->flags & POOL_MANAGER_ACTIVE;
     809                 :          3 :         int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
     810                 :          3 :         int nr_busy = pool->nr_workers - nr_idle;
     811                 :            : 
     812                 :          3 :         return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
     813                 :            : }
     814                 :            : 
     815                 :            : /*
     816                 :            :  * Wake up functions.
     817                 :            :  */
     818                 :            : 
     819                 :            : /* Return the first idle worker.  Safe with preemption disabled */
     820                 :            : static struct worker *first_idle_worker(struct worker_pool *pool)
     821                 :            : {
     822                 :          3 :         if (unlikely(list_empty(&pool->idle_list)))
     823                 :            :                 return NULL;
     824                 :            : 
     825                 :          3 :         return list_first_entry(&pool->idle_list, struct worker, entry);
     826                 :            : }
     827                 :            : 
     828                 :            : /**
     829                 :            :  * wake_up_worker - wake up an idle worker
     830                 :            :  * @pool: worker pool to wake worker from
     831                 :            :  *
     832                 :            :  * Wake up the first idle worker of @pool.
     833                 :            :  *
     834                 :            :  * CONTEXT:
     835                 :            :  * spin_lock_irq(pool->lock).
     836                 :            :  */
     837                 :          3 : static void wake_up_worker(struct worker_pool *pool)
     838                 :            : {
     839                 :            :         struct worker *worker = first_idle_worker(pool);
     840                 :            : 
     841                 :          3 :         if (likely(worker))
     842                 :          3 :                 wake_up_process(worker->task);
     843                 :          3 : }
     844                 :            : 
     845                 :            : /**
     846                 :            :  * wq_worker_running - a worker is running again
     847                 :            :  * @task: task waking up
     848                 :            :  *
     849                 :            :  * This function is called when a worker returns from schedule()
     850                 :            :  */
     851                 :          3 : void wq_worker_running(struct task_struct *task)
     852                 :            : {
     853                 :          3 :         struct worker *worker = kthread_data(task);
     854                 :            : 
     855                 :          3 :         if (!worker->sleeping)
     856                 :          3 :                 return;
     857                 :          3 :         if (!(worker->flags & WORKER_NOT_RUNNING))
     858                 :          3 :                 atomic_inc(&worker->pool->nr_running);
     859                 :          3 :         worker->sleeping = 0;
     860                 :            : }
     861                 :            : 
     862                 :            : /**
     863                 :            :  * wq_worker_sleeping - a worker is going to sleep
     864                 :            :  * @task: task going to sleep
     865                 :            :  *
     866                 :            :  * This function is called from schedule() when a busy worker is
     867                 :            :  * going to sleep.
     868                 :            :  */
     869                 :          3 : void wq_worker_sleeping(struct task_struct *task)
     870                 :            : {
     871                 :          3 :         struct worker *next, *worker = kthread_data(task);
     872                 :            :         struct worker_pool *pool;
     873                 :            : 
     874                 :            :         /*
     875                 :            :          * Rescuers, which may not have all the fields set up like normal
     876                 :            :          * workers, also reach here, let's not access anything before
     877                 :            :          * checking NOT_RUNNING.
     878                 :            :          */
     879                 :          3 :         if (worker->flags & WORKER_NOT_RUNNING)
     880                 :            :                 return;
     881                 :            : 
     882                 :          3 :         pool = worker->pool;
     883                 :            : 
     884                 :          3 :         if (WARN_ON_ONCE(worker->sleeping))
     885                 :            :                 return;
     886                 :            : 
     887                 :          3 :         worker->sleeping = 1;
     888                 :            :         spin_lock_irq(&pool->lock);
     889                 :            : 
     890                 :            :         /*
     891                 :            :          * The counterpart of the following dec_and_test, implied mb,
     892                 :            :          * worklist not empty test sequence is in insert_work().
     893                 :            :          * Please read comment there.
     894                 :            :          *
     895                 :            :          * NOT_RUNNING is clear.  This means that we're bound to and
     896                 :            :          * running on the local cpu w/ rq lock held and preemption
     897                 :            :          * disabled, which in turn means that none else could be
     898                 :            :          * manipulating idle_list, so dereferencing idle_list without pool
     899                 :            :          * lock is safe.
     900                 :            :          */
     901                 :          3 :         if (atomic_dec_and_test(&pool->nr_running) &&
     902                 :          3 :             !list_empty(&pool->worklist)) {
     903                 :            :                 next = first_idle_worker(pool);
     904                 :          3 :                 if (next)
     905                 :          3 :                         wake_up_process(next->task);
     906                 :            :         }
     907                 :            :         spin_unlock_irq(&pool->lock);
     908                 :            : }
     909                 :            : 
     910                 :            : /**
     911                 :            :  * wq_worker_last_func - retrieve worker's last work function
     912                 :            :  * @task: Task to retrieve last work function of.
     913                 :            :  *
     914                 :            :  * Determine the last function a worker executed. This is called from
     915                 :            :  * the scheduler to get a worker's last known identity.
     916                 :            :  *
     917                 :            :  * CONTEXT:
     918                 :            :  * spin_lock_irq(rq->lock)
     919                 :            :  *
     920                 :            :  * This function is called during schedule() when a kworker is going
     921                 :            :  * to sleep. It's used by psi to identify aggregation workers during
     922                 :            :  * dequeuing, to allow periodic aggregation to shut-off when that
     923                 :            :  * worker is the last task in the system or cgroup to go to sleep.
     924                 :            :  *
     925                 :            :  * As this function doesn't involve any workqueue-related locking, it
     926                 :            :  * only returns stable values when called from inside the scheduler's
     927                 :            :  * queuing and dequeuing paths, when @task, which must be a kworker,
     928                 :            :  * is guaranteed to not be processing any works.
     929                 :            :  *
     930                 :            :  * Return:
     931                 :            :  * The last work function %current executed as a worker, NULL if it
     932                 :            :  * hasn't executed any work yet.
     933                 :            :  */
     934                 :          0 : work_func_t wq_worker_last_func(struct task_struct *task)
     935                 :            : {
     936                 :          0 :         struct worker *worker = kthread_data(task);
     937                 :            : 
     938                 :          0 :         return worker->last_func;
     939                 :            : }
     940                 :            : 
     941                 :            : /**
     942                 :            :  * worker_set_flags - set worker flags and adjust nr_running accordingly
     943                 :            :  * @worker: self
     944                 :            :  * @flags: flags to set
     945                 :            :  *
     946                 :            :  * Set @flags in @worker->flags and adjust nr_running accordingly.
     947                 :            :  *
     948                 :            :  * CONTEXT:
     949                 :            :  * spin_lock_irq(pool->lock)
     950                 :            :  */
     951                 :          3 : static inline void worker_set_flags(struct worker *worker, unsigned int flags)
     952                 :            : {
     953                 :          3 :         struct worker_pool *pool = worker->pool;
     954                 :            : 
     955                 :          3 :         WARN_ON_ONCE(worker->task != current);
     956                 :            : 
     957                 :            :         /* If transitioning into NOT_RUNNING, adjust nr_running. */
     958                 :          3 :         if ((flags & WORKER_NOT_RUNNING) &&
     959                 :          3 :             !(worker->flags & WORKER_NOT_RUNNING)) {
     960                 :          3 :                 atomic_dec(&pool->nr_running);
     961                 :            :         }
     962                 :            : 
     963                 :          3 :         worker->flags |= flags;
     964                 :          3 : }
     965                 :            : 
     966                 :            : /**
     967                 :            :  * worker_clr_flags - clear worker flags and adjust nr_running accordingly
     968                 :            :  * @worker: self
     969                 :            :  * @flags: flags to clear
     970                 :            :  *
     971                 :            :  * Clear @flags in @worker->flags and adjust nr_running accordingly.
     972                 :            :  *
     973                 :            :  * CONTEXT:
     974                 :            :  * spin_lock_irq(pool->lock)
     975                 :            :  */
     976                 :          3 : static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
     977                 :            : {
     978                 :          3 :         struct worker_pool *pool = worker->pool;
     979                 :          3 :         unsigned int oflags = worker->flags;
     980                 :            : 
     981                 :          3 :         WARN_ON_ONCE(worker->task != current);
     982                 :            : 
     983                 :          3 :         worker->flags &= ~flags;
     984                 :            : 
     985                 :            :         /*
     986                 :            :          * If transitioning out of NOT_RUNNING, increment nr_running.  Note
     987                 :            :          * that the nested NOT_RUNNING is not a noop.  NOT_RUNNING is mask
     988                 :            :          * of multiple flags, not a single flag.
     989                 :            :          */
     990                 :          3 :         if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
     991                 :          3 :                 if (!(worker->flags & WORKER_NOT_RUNNING))
     992                 :          3 :                         atomic_inc(&pool->nr_running);
     993                 :          3 : }
     994                 :            : 
     995                 :            : /**
     996                 :            :  * find_worker_executing_work - find worker which is executing a work
     997                 :            :  * @pool: pool of interest
     998                 :            :  * @work: work to find worker for
     999                 :            :  *
    1000                 :            :  * Find a worker which is executing @work on @pool by searching
    1001                 :            :  * @pool->busy_hash which is keyed by the address of @work.  For a worker
    1002                 :            :  * to match, its current execution should match the address of @work and
    1003                 :            :  * its work function.  This is to avoid unwanted dependency between
    1004                 :            :  * unrelated work executions through a work item being recycled while still
    1005                 :            :  * being executed.
    1006                 :            :  *
    1007                 :            :  * This is a bit tricky.  A work item may be freed once its execution
    1008                 :            :  * starts and nothing prevents the freed area from being recycled for
    1009                 :            :  * another work item.  If the same work item address ends up being reused
    1010                 :            :  * before the original execution finishes, workqueue will identify the
    1011                 :            :  * recycled work item as currently executing and make it wait until the
    1012                 :            :  * current execution finishes, introducing an unwanted dependency.
    1013                 :            :  *
    1014                 :            :  * This function checks the work item address and work function to avoid
    1015                 :            :  * false positives.  Note that this isn't complete as one may construct a
    1016                 :            :  * work function which can introduce dependency onto itself through a
    1017                 :            :  * recycled work item.  Well, if somebody wants to shoot oneself in the
    1018                 :            :  * foot that badly, there's only so much we can do, and if such deadlock
    1019                 :            :  * actually occurs, it should be easy to locate the culprit work function.
    1020                 :            :  *
    1021                 :            :  * CONTEXT:
    1022                 :            :  * spin_lock_irq(pool->lock).
    1023                 :            :  *
    1024                 :            :  * Return:
    1025                 :            :  * Pointer to worker which is executing @work if found, %NULL
    1026                 :            :  * otherwise.
    1027                 :            :  */
    1028                 :          3 : static struct worker *find_worker_executing_work(struct worker_pool *pool,
    1029                 :            :                                                  struct work_struct *work)
    1030                 :            : {
    1031                 :            :         struct worker *worker;
    1032                 :            : 
    1033                 :          3 :         hash_for_each_possible(pool->busy_hash, worker, hentry,
    1034                 :            :                                (unsigned long)work)
    1035                 :          3 :                 if (worker->current_work == work &&
    1036                 :          3 :                     worker->current_func == work->func)
    1037                 :          3 :                         return worker;
    1038                 :            : 
    1039                 :            :         return NULL;
    1040                 :            : }
    1041                 :            : 
    1042                 :            : /**
    1043                 :            :  * move_linked_works - move linked works to a list
    1044                 :            :  * @work: start of series of works to be scheduled
    1045                 :            :  * @head: target list to append @work to
    1046                 :            :  * @nextp: out parameter for nested worklist walking
    1047                 :            :  *
    1048                 :            :  * Schedule linked works starting from @work to @head.  Work series to
    1049                 :            :  * be scheduled starts at @work and includes any consecutive work with
    1050                 :            :  * WORK_STRUCT_LINKED set in its predecessor.
    1051                 :            :  *
    1052                 :            :  * If @nextp is not NULL, it's updated to point to the next work of
    1053                 :            :  * the last scheduled work.  This allows move_linked_works() to be
    1054                 :            :  * nested inside outer list_for_each_entry_safe().
    1055                 :            :  *
    1056                 :            :  * CONTEXT:
    1057                 :            :  * spin_lock_irq(pool->lock).
    1058                 :            :  */
    1059                 :          3 : static void move_linked_works(struct work_struct *work, struct list_head *head,
    1060                 :            :                               struct work_struct **nextp)
    1061                 :            : {
    1062                 :            :         struct work_struct *n;
    1063                 :            : 
    1064                 :            :         /*
    1065                 :            :          * Linked worklist will always end before the end of the list,
    1066                 :            :          * use NULL for list head.
    1067                 :            :          */
    1068                 :          3 :         list_for_each_entry_safe_from(work, n, NULL, entry) {
    1069                 :            :                 list_move_tail(&work->entry, head);
    1070                 :          3 :                 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
    1071                 :            :                         break;
    1072                 :            :         }
    1073                 :            : 
    1074                 :            :         /*
    1075                 :            :          * If we're already inside safe list traversal and have moved
    1076                 :            :          * multiple works to the scheduled queue, the next position
    1077                 :            :          * needs to be updated.
    1078                 :            :          */
    1079                 :          3 :         if (nextp)
    1080                 :          1 :                 *nextp = n;
    1081                 :          3 : }
    1082                 :            : 
    1083                 :            : /**
    1084                 :            :  * get_pwq - get an extra reference on the specified pool_workqueue
    1085                 :            :  * @pwq: pool_workqueue to get
    1086                 :            :  *
    1087                 :            :  * Obtain an extra reference on @pwq.  The caller should guarantee that
    1088                 :            :  * @pwq has positive refcnt and be holding the matching pool->lock.
    1089                 :            :  */
    1090                 :          3 : static void get_pwq(struct pool_workqueue *pwq)
    1091                 :            : {
    1092                 :            :         lockdep_assert_held(&pwq->pool->lock);
    1093                 :          3 :         WARN_ON_ONCE(pwq->refcnt <= 0);
    1094                 :          3 :         pwq->refcnt++;
    1095                 :          3 : }
    1096                 :            : 
    1097                 :            : /**
    1098                 :            :  * put_pwq - put a pool_workqueue reference
    1099                 :            :  * @pwq: pool_workqueue to put
    1100                 :            :  *
    1101                 :            :  * Drop a reference of @pwq.  If its refcnt reaches zero, schedule its
    1102                 :            :  * destruction.  The caller should be holding the matching pool->lock.
    1103                 :            :  */
    1104                 :          3 : static void put_pwq(struct pool_workqueue *pwq)
    1105                 :            : {
    1106                 :            :         lockdep_assert_held(&pwq->pool->lock);
    1107                 :          3 :         if (likely(--pwq->refcnt))
    1108                 :            :                 return;
    1109                 :          2 :         if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
    1110                 :            :                 return;
    1111                 :            :         /*
    1112                 :            :          * @pwq can't be released under pool->lock, bounce to
    1113                 :            :          * pwq_unbound_release_workfn().  This never recurses on the same
    1114                 :            :          * pool->lock as this path is taken only for unbound workqueues and
    1115                 :            :          * the release work item is scheduled on a per-cpu workqueue.  To
    1116                 :            :          * avoid lockdep warning, unbound pool->locks are given lockdep
    1117                 :            :          * subclass of 1 in get_unbound_pool().
    1118                 :            :          */
    1119                 :          2 :         schedule_work(&pwq->unbound_release_work);
    1120                 :            : }
    1121                 :            : 
    1122                 :            : /**
    1123                 :            :  * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
    1124                 :            :  * @pwq: pool_workqueue to put (can be %NULL)
    1125                 :            :  *
    1126                 :            :  * put_pwq() with locking.  This function also allows %NULL @pwq.
    1127                 :            :  */
    1128                 :          3 : static void put_pwq_unlocked(struct pool_workqueue *pwq)
    1129                 :            : {
    1130                 :          3 :         if (pwq) {
    1131                 :            :                 /*
    1132                 :            :                  * As both pwqs and pools are RCU protected, the
    1133                 :            :                  * following lock operations are safe.
    1134                 :            :                  */
    1135                 :          2 :                 spin_lock_irq(&pwq->pool->lock);
    1136                 :          2 :                 put_pwq(pwq);
    1137                 :          2 :                 spin_unlock_irq(&pwq->pool->lock);
    1138                 :            :         }
    1139                 :          3 : }
    1140                 :            : 
    1141                 :          3 : static void pwq_activate_delayed_work(struct work_struct *work)
    1142                 :            : {
    1143                 :            :         struct pool_workqueue *pwq = get_work_pwq(work);
    1144                 :            : 
    1145                 :          3 :         trace_workqueue_activate_work(work);
    1146                 :          3 :         if (list_empty(&pwq->pool->worklist))
    1147                 :          3 :                 pwq->pool->watchdog_ts = jiffies;
    1148                 :          3 :         move_linked_works(work, &pwq->pool->worklist, NULL);
    1149                 :            :         __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
    1150                 :          3 :         pwq->nr_active++;
    1151                 :          3 : }
    1152                 :            : 
    1153                 :            : static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
    1154                 :            : {
    1155                 :          3 :         struct work_struct *work = list_first_entry(&pwq->delayed_works,
    1156                 :            :                                                     struct work_struct, entry);
    1157                 :            : 
    1158                 :          3 :         pwq_activate_delayed_work(work);
    1159                 :            : }
    1160                 :            : 
    1161                 :            : /**
    1162                 :            :  * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
    1163                 :            :  * @pwq: pwq of interest
    1164                 :            :  * @color: color of work which left the queue
    1165                 :            :  *
    1166                 :            :  * A work either has completed or is removed from pending queue,
    1167                 :            :  * decrement nr_in_flight of its pwq and handle workqueue flushing.
    1168                 :            :  *
    1169                 :            :  * CONTEXT:
    1170                 :            :  * spin_lock_irq(pool->lock).
    1171                 :            :  */
    1172                 :          3 : static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
    1173                 :            : {
    1174                 :            :         /* uncolored work items don't participate in flushing or nr_active */
    1175                 :          3 :         if (color == WORK_NO_COLOR)
    1176                 :            :                 goto out_put;
    1177                 :            : 
    1178                 :          3 :         pwq->nr_in_flight[color]--;
    1179                 :            : 
    1180                 :          3 :         pwq->nr_active--;
    1181                 :          3 :         if (!list_empty(&pwq->delayed_works)) {
    1182                 :            :                 /* one down, submit a delayed one */
    1183                 :          3 :                 if (pwq->nr_active < pwq->max_active)
    1184                 :            :                         pwq_activate_first_delayed(pwq);
    1185                 :            :         }
    1186                 :            : 
    1187                 :            :         /* is flush in progress and are we at the flushing tip? */
    1188                 :          3 :         if (likely(pwq->flush_color != color))
    1189                 :            :                 goto out_put;
    1190                 :            : 
    1191                 :            :         /* are there still in-flight works? */
    1192                 :          0 :         if (pwq->nr_in_flight[color])
    1193                 :            :                 goto out_put;
    1194                 :            : 
    1195                 :            :         /* this pwq is done, clear flush_color */
    1196                 :          0 :         pwq->flush_color = -1;
    1197                 :            : 
    1198                 :            :         /*
    1199                 :            :          * If this was the last pwq, wake up the first flusher.  It
    1200                 :            :          * will handle the rest.
    1201                 :            :          */
    1202                 :          0 :         if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
    1203                 :          0 :                 complete(&pwq->wq->first_flusher->done);
    1204                 :            : out_put:
    1205                 :          3 :         put_pwq(pwq);
    1206                 :          3 : }
    1207                 :            : 
    1208                 :            : /**
    1209                 :            :  * try_to_grab_pending - steal work item from worklist and disable irq
    1210                 :            :  * @work: work item to steal
    1211                 :            :  * @is_dwork: @work is a delayed_work
    1212                 :            :  * @flags: place to store irq state
    1213                 :            :  *
    1214                 :            :  * Try to grab PENDING bit of @work.  This function can handle @work in any
    1215                 :            :  * stable state - idle, on timer or on worklist.
    1216                 :            :  *
    1217                 :            :  * Return:
    1218                 :            :  *  1           if @work was pending and we successfully stole PENDING
    1219                 :            :  *  0           if @work was idle and we claimed PENDING
    1220                 :            :  *  -EAGAIN     if PENDING couldn't be grabbed at the moment, safe to busy-retry
    1221                 :            :  *  -ENOENT     if someone else is canceling @work, this state may persist
    1222                 :            :  *              for arbitrarily long
    1223                 :            :  *
    1224                 :            :  * Note:
    1225                 :            :  * On >= 0 return, the caller owns @work's PENDING bit.  To avoid getting
    1226                 :            :  * interrupted while holding PENDING and @work off queue, irq must be
    1227                 :            :  * disabled on entry.  This, combined with delayed_work->timer being
    1228                 :            :  * irqsafe, ensures that we return -EAGAIN for finite short period of time.
    1229                 :            :  *
    1230                 :            :  * On successful return, >= 0, irq is disabled and the caller is
    1231                 :            :  * responsible for releasing it using local_irq_restore(*@flags).
    1232                 :            :  *
    1233                 :            :  * This function is safe to call from any context including IRQ handler.
    1234                 :            :  */
    1235                 :          3 : static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
    1236                 :            :                                unsigned long *flags)
    1237                 :            : {
    1238                 :            :         struct worker_pool *pool;
    1239                 :            :         struct pool_workqueue *pwq;
    1240                 :            : 
    1241                 :          3 :         local_irq_save(*flags);
    1242                 :            : 
    1243                 :            :         /* try to steal the timer if it exists */
    1244                 :          3 :         if (is_dwork) {
    1245                 :            :                 struct delayed_work *dwork = to_delayed_work(work);
    1246                 :            : 
    1247                 :            :                 /*
    1248                 :            :                  * dwork->timer is irqsafe.  If del_timer() fails, it's
    1249                 :            :                  * guaranteed that the timer is not queued anywhere and not
    1250                 :            :                  * running on the local CPU.
    1251                 :            :                  */
    1252                 :          3 :                 if (likely(del_timer(&dwork->timer)))
    1253                 :            :                         return 1;
    1254                 :            :         }
    1255                 :            : 
    1256                 :            :         /* try to claim PENDING the normal way */
    1257                 :          3 :         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
    1258                 :            :                 return 0;
    1259                 :            : 
    1260                 :            :         rcu_read_lock();
    1261                 :            :         /*
    1262                 :            :          * The queueing is in progress, or it is already queued. Try to
    1263                 :            :          * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
    1264                 :            :          */
    1265                 :          3 :         pool = get_work_pool(work);
    1266                 :          3 :         if (!pool)
    1267                 :            :                 goto fail;
    1268                 :            : 
    1269                 :            :         spin_lock(&pool->lock);
    1270                 :            :         /*
    1271                 :            :          * work->data is guaranteed to point to pwq only while the work
    1272                 :            :          * item is queued on pwq->wq, and both updating work->data to point
    1273                 :            :          * to pwq on queueing and to pool on dequeueing are done under
    1274                 :            :          * pwq->pool->lock.  This in turn guarantees that, if work->data
    1275                 :            :          * points to pwq which is associated with a locked pool, the work
    1276                 :            :          * item is currently queued on that pool.
    1277                 :            :          */
    1278                 :            :         pwq = get_work_pwq(work);
    1279                 :          3 :         if (pwq && pwq->pool == pool) {
    1280                 :            :                 debug_work_deactivate(work);
    1281                 :            : 
    1282                 :            :                 /*
    1283                 :            :                  * A delayed work item cannot be grabbed directly because
    1284                 :            :                  * it might have linked NO_COLOR work items which, if left
    1285                 :            :                  * on the delayed_list, will confuse pwq->nr_active
    1286                 :            :                  * management later on and cause stall.  Make sure the work
    1287                 :            :                  * item is activated before grabbing.
    1288                 :            :                  */
    1289                 :          3 :                 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
    1290                 :          0 :                         pwq_activate_delayed_work(work);
    1291                 :            : 
    1292                 :          3 :                 list_del_init(&work->entry);
    1293                 :          3 :                 pwq_dec_nr_in_flight(pwq, get_work_color(work));
    1294                 :            : 
    1295                 :            :                 /* work->data points to pwq iff queued, point to pool */
    1296                 :          3 :                 set_work_pool_and_keep_pending(work, pool->id);
    1297                 :            : 
    1298                 :            :                 spin_unlock(&pool->lock);
    1299                 :            :                 rcu_read_unlock();
    1300                 :          3 :                 return 1;
    1301                 :            :         }
    1302                 :            :         spin_unlock(&pool->lock);
    1303                 :            : fail:
    1304                 :            :         rcu_read_unlock();
    1305                 :          3 :         local_irq_restore(*flags);
    1306                 :          3 :         if (work_is_canceling(work))
    1307                 :            :                 return -ENOENT;
    1308                 :          3 :         cpu_relax();
    1309                 :          3 :         return -EAGAIN;
    1310                 :            : }
    1311                 :            : 
    1312                 :            : /**
    1313                 :            :  * insert_work - insert a work into a pool
    1314                 :            :  * @pwq: pwq @work belongs to
    1315                 :            :  * @work: work to insert
    1316                 :            :  * @head: insertion point
    1317                 :            :  * @extra_flags: extra WORK_STRUCT_* flags to set
    1318                 :            :  *
    1319                 :            :  * Insert @work which belongs to @pwq after @head.  @extra_flags is or'd to
    1320                 :            :  * work_struct flags.
    1321                 :            :  *
    1322                 :            :  * CONTEXT:
    1323                 :            :  * spin_lock_irq(pool->lock).
    1324                 :            :  */
    1325                 :          3 : static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
    1326                 :            :                         struct list_head *head, unsigned int extra_flags)
    1327                 :            : {
    1328                 :          3 :         struct worker_pool *pool = pwq->pool;
    1329                 :            : 
    1330                 :            :         /* we own @work, set data and link */
    1331                 :            :         set_work_pwq(work, pwq, extra_flags);
    1332                 :          3 :         list_add_tail(&work->entry, head);
    1333                 :          3 :         get_pwq(pwq);
    1334                 :            : 
    1335                 :            :         /*
    1336                 :            :          * Ensure either wq_worker_sleeping() sees the above
    1337                 :            :          * list_add_tail() or we see zero nr_running to avoid workers lying
    1338                 :            :          * around lazily while there are works to be processed.
    1339                 :            :          */
    1340                 :          3 :         smp_mb();
    1341                 :            : 
    1342                 :          3 :         if (__need_more_worker(pool))
    1343                 :          3 :                 wake_up_worker(pool);
    1344                 :          3 : }
    1345                 :            : 
    1346                 :            : /*
    1347                 :            :  * Test whether @work is being queued from another work executing on the
    1348                 :            :  * same workqueue.
    1349                 :            :  */
    1350                 :            : static bool is_chained_work(struct workqueue_struct *wq)
    1351                 :            : {
    1352                 :            :         struct worker *worker;
    1353                 :            : 
    1354                 :          0 :         worker = current_wq_worker();
    1355                 :            :         /*
    1356                 :            :          * Return %true iff I'm a worker executing a work item on @wq.  If
    1357                 :            :          * I'm @worker, it's safe to dereference it without locking.
    1358                 :            :          */
    1359                 :          0 :         return worker && worker->current_pwq->wq == wq;
    1360                 :            : }
    1361                 :            : 
    1362                 :            : /*
    1363                 :            :  * When queueing an unbound work item to a wq, prefer local CPU if allowed
    1364                 :            :  * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
    1365                 :            :  * avoid perturbing sensitive tasks.
    1366                 :            :  */
    1367                 :          3 : static int wq_select_unbound_cpu(int cpu)
    1368                 :            : {
    1369                 :            :         static bool printed_dbg_warning;
    1370                 :            :         int new_cpu;
    1371                 :            : 
    1372                 :          3 :         if (likely(!wq_debug_force_rr_cpu)) {
    1373                 :          3 :                 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
    1374                 :            :                         return cpu;
    1375                 :          0 :         } else if (!printed_dbg_warning) {
    1376                 :          0 :                 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
    1377                 :          0 :                 printed_dbg_warning = true;
    1378                 :            :         }
    1379                 :            : 
    1380                 :          0 :         if (cpumask_empty(wq_unbound_cpumask))
    1381                 :            :                 return cpu;
    1382                 :            : 
    1383                 :          0 :         new_cpu = __this_cpu_read(wq_rr_cpu_last);
    1384                 :          0 :         new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
    1385                 :          0 :         if (unlikely(new_cpu >= nr_cpu_ids)) {
    1386                 :          0 :                 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
    1387                 :          0 :                 if (unlikely(new_cpu >= nr_cpu_ids))
    1388                 :            :                         return cpu;
    1389                 :            :         }
    1390                 :          0 :         __this_cpu_write(wq_rr_cpu_last, new_cpu);
    1391                 :            : 
    1392                 :          0 :         return new_cpu;
    1393                 :            : }
    1394                 :            : 
    1395                 :          3 : static void __queue_work(int cpu, struct workqueue_struct *wq,
    1396                 :            :                          struct work_struct *work)
    1397                 :            : {
    1398                 :            :         struct pool_workqueue *pwq;
    1399                 :            :         struct worker_pool *last_pool;
    1400                 :            :         struct list_head *worklist;
    1401                 :            :         unsigned int work_flags;
    1402                 :          3 :         unsigned int req_cpu = cpu;
    1403                 :            : 
    1404                 :            :         /*
    1405                 :            :          * While a work item is PENDING && off queue, a task trying to
    1406                 :            :          * steal the PENDING will busy-loop waiting for it to either get
    1407                 :            :          * queued or lose PENDING.  Grabbing PENDING and queueing should
    1408                 :            :          * happen with IRQ disabled.
    1409                 :            :          */
    1410                 :            :         lockdep_assert_irqs_disabled();
    1411                 :            : 
    1412                 :            :         debug_work_activate(work);
    1413                 :            : 
    1414                 :            :         /* if draining, only works from the same workqueue are allowed */
    1415                 :          3 :         if (unlikely(wq->flags & __WQ_DRAINING) &&
    1416                 :          0 :             WARN_ON_ONCE(!is_chained_work(wq)))
    1417                 :          3 :                 return;
    1418                 :            :         rcu_read_lock();
    1419                 :            : retry:
    1420                 :            :         /* pwq which will be used unless @work is executing elsewhere */
    1421                 :          3 :         if (wq->flags & WQ_UNBOUND) {
    1422                 :          3 :                 if (req_cpu == WORK_CPU_UNBOUND)
    1423                 :          3 :                         cpu = wq_select_unbound_cpu(raw_smp_processor_id());
    1424                 :            :                 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
    1425                 :            :         } else {
    1426                 :          3 :                 if (req_cpu == WORK_CPU_UNBOUND)
    1427                 :          3 :                         cpu = raw_smp_processor_id();
    1428                 :          3 :                 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
    1429                 :            :         }
    1430                 :            : 
    1431                 :            :         /*
    1432                 :            :          * If @work was previously on a different pool, it might still be
    1433                 :            :          * running there, in which case the work needs to be queued on that
    1434                 :            :          * pool to guarantee non-reentrancy.
    1435                 :            :          */
    1436                 :          3 :         last_pool = get_work_pool(work);
    1437                 :          3 :         if (last_pool && last_pool != pwq->pool) {
    1438                 :            :                 struct worker *worker;
    1439                 :            : 
    1440                 :            :                 spin_lock(&last_pool->lock);
    1441                 :            : 
    1442                 :          3 :                 worker = find_worker_executing_work(last_pool, work);
    1443                 :            : 
    1444                 :          3 :                 if (worker && worker->current_pwq->wq == wq) {
    1445                 :            :                         pwq = worker->current_pwq;
    1446                 :            :                 } else {
    1447                 :            :                         /* meh... not running there, queue here */
    1448                 :            :                         spin_unlock(&last_pool->lock);
    1449                 :          3 :                         spin_lock(&pwq->pool->lock);
    1450                 :            :                 }
    1451                 :            :         } else {
    1452                 :          3 :                 spin_lock(&pwq->pool->lock);
    1453                 :            :         }
    1454                 :            : 
    1455                 :            :         /*
    1456                 :            :          * pwq is determined and locked.  For unbound pools, we could have
    1457                 :            :          * raced with pwq release and it could already be dead.  If its
    1458                 :            :          * refcnt is zero, repeat pwq selection.  Note that pwqs never die
    1459                 :            :          * without another pwq replacing it in the numa_pwq_tbl or while
    1460                 :            :          * work items are executing on it, so the retrying is guaranteed to
    1461                 :            :          * make forward-progress.
    1462                 :            :          */
    1463                 :          3 :         if (unlikely(!pwq->refcnt)) {
    1464                 :          0 :                 if (wq->flags & WQ_UNBOUND) {
    1465                 :          0 :                         spin_unlock(&pwq->pool->lock);
    1466                 :          0 :                         cpu_relax();
    1467                 :          0 :                         goto retry;
    1468                 :            :                 }
    1469                 :            :                 /* oops */
    1470                 :          0 :                 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
    1471                 :            :                           wq->name, cpu);
    1472                 :            :         }
    1473                 :            : 
    1474                 :            :         /* pwq determined, queue */
    1475                 :          3 :         trace_workqueue_queue_work(req_cpu, pwq, work);
    1476                 :            : 
    1477                 :          3 :         if (WARN_ON(!list_empty(&work->entry)))
    1478                 :            :                 goto out;
    1479                 :            : 
    1480                 :          3 :         pwq->nr_in_flight[pwq->work_color]++;
    1481                 :            :         work_flags = work_color_to_flags(pwq->work_color);
    1482                 :            : 
    1483                 :          3 :         if (likely(pwq->nr_active < pwq->max_active)) {
    1484                 :          3 :                 trace_workqueue_activate_work(work);
    1485                 :          3 :                 pwq->nr_active++;
    1486                 :          3 :                 worklist = &pwq->pool->worklist;
    1487                 :          3 :                 if (list_empty(worklist))
    1488                 :          3 :                         pwq->pool->watchdog_ts = jiffies;
    1489                 :            :         } else {
    1490                 :          3 :                 work_flags |= WORK_STRUCT_DELAYED;
    1491                 :          3 :                 worklist = &pwq->delayed_works;
    1492                 :            :         }
    1493                 :            : 
    1494                 :          3 :         insert_work(pwq, work, worklist, work_flags);
    1495                 :            : 
    1496                 :            : out:
    1497                 :          3 :         spin_unlock(&pwq->pool->lock);
    1498                 :            :         rcu_read_unlock();
    1499                 :            : }
    1500                 :            : 
    1501                 :            : /**
    1502                 :            :  * queue_work_on - queue work on specific cpu
    1503                 :            :  * @cpu: CPU number to execute work on
    1504                 :            :  * @wq: workqueue to use
    1505                 :            :  * @work: work to queue
    1506                 :            :  *
    1507                 :            :  * We queue the work to a specific CPU, the caller must ensure it
    1508                 :            :  * can't go away.
    1509                 :            :  *
    1510                 :            :  * Return: %false if @work was already on a queue, %true otherwise.
    1511                 :            :  */
    1512                 :          3 : bool queue_work_on(int cpu, struct workqueue_struct *wq,
    1513                 :            :                    struct work_struct *work)
    1514                 :            : {
    1515                 :            :         bool ret = false;
    1516                 :            :         unsigned long flags;
    1517                 :            : 
    1518                 :          3 :         local_irq_save(flags);
    1519                 :            : 
    1520                 :          3 :         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
    1521                 :          3 :                 __queue_work(cpu, wq, work);
    1522                 :            :                 ret = true;
    1523                 :            :         }
    1524                 :            : 
    1525                 :          3 :         local_irq_restore(flags);
    1526                 :          3 :         return ret;
    1527                 :            : }
    1528                 :            : EXPORT_SYMBOL(queue_work_on);
    1529                 :            : 
    1530                 :            : /**
    1531                 :            :  * workqueue_select_cpu_near - Select a CPU based on NUMA node
    1532                 :            :  * @node: NUMA node ID that we want to select a CPU from
    1533                 :            :  *
    1534                 :            :  * This function will attempt to find a "random" cpu available on a given
    1535                 :            :  * node. If there are no CPUs available on the given node it will return
    1536                 :            :  * WORK_CPU_UNBOUND indicating that we should just schedule to any
    1537                 :            :  * available CPU if we need to schedule this work.
    1538                 :            :  */
    1539                 :          0 : static int workqueue_select_cpu_near(int node)
    1540                 :            : {
    1541                 :            :         int cpu;
    1542                 :            : 
    1543                 :            :         /* No point in doing this if NUMA isn't enabled for workqueues */
    1544                 :          0 :         if (!wq_numa_enabled)
    1545                 :            :                 return WORK_CPU_UNBOUND;
    1546                 :            : 
    1547                 :            :         /* Delay binding to CPU if node is not valid or online */
    1548                 :          0 :         if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
    1549                 :            :                 return WORK_CPU_UNBOUND;
    1550                 :            : 
    1551                 :            :         /* Use local node/cpu if we are already there */
    1552                 :          0 :         cpu = raw_smp_processor_id();
    1553                 :          0 :         if (node == cpu_to_node(cpu))
    1554                 :            :                 return cpu;
    1555                 :            : 
    1556                 :            :         /* Use "random" otherwise know as "first" online CPU of node */
    1557                 :          0 :         cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
    1558                 :            : 
    1559                 :            :         /* If CPU is valid return that, otherwise just defer */
    1560                 :          0 :         return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
    1561                 :            : }
    1562                 :            : 
    1563                 :            : /**
    1564                 :            :  * queue_work_node - queue work on a "random" cpu for a given NUMA node
    1565                 :            :  * @node: NUMA node that we are targeting the work for
    1566                 :            :  * @wq: workqueue to use
    1567                 :            :  * @work: work to queue
    1568                 :            :  *
    1569                 :            :  * We queue the work to a "random" CPU within a given NUMA node. The basic
    1570                 :            :  * idea here is to provide a way to somehow associate work with a given
    1571                 :            :  * NUMA node.
    1572                 :            :  *
    1573                 :            :  * This function will only make a best effort attempt at getting this onto
    1574                 :            :  * the right NUMA node. If no node is requested or the requested node is
    1575                 :            :  * offline then we just fall back to standard queue_work behavior.
    1576                 :            :  *
    1577                 :            :  * Currently the "random" CPU ends up being the first available CPU in the
    1578                 :            :  * intersection of cpu_online_mask and the cpumask of the node, unless we
    1579                 :            :  * are running on the node. In that case we just use the current CPU.
    1580                 :            :  *
    1581                 :            :  * Return: %false if @work was already on a queue, %true otherwise.
    1582                 :            :  */
    1583                 :          0 : bool queue_work_node(int node, struct workqueue_struct *wq,
    1584                 :            :                      struct work_struct *work)
    1585                 :            : {
    1586                 :            :         unsigned long flags;
    1587                 :            :         bool ret = false;
    1588                 :            : 
    1589                 :            :         /*
    1590                 :            :          * This current implementation is specific to unbound workqueues.
    1591                 :            :          * Specifically we only return the first available CPU for a given
    1592                 :            :          * node instead of cycling through individual CPUs within the node.
    1593                 :            :          *
    1594                 :            :          * If this is used with a per-cpu workqueue then the logic in
    1595                 :            :          * workqueue_select_cpu_near would need to be updated to allow for
    1596                 :            :          * some round robin type logic.
    1597                 :            :          */
    1598                 :          0 :         WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
    1599                 :            : 
    1600                 :          0 :         local_irq_save(flags);
    1601                 :            : 
    1602                 :          0 :         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
    1603                 :          0 :                 int cpu = workqueue_select_cpu_near(node);
    1604                 :            : 
    1605                 :          0 :                 __queue_work(cpu, wq, work);
    1606                 :            :                 ret = true;
    1607                 :            :         }
    1608                 :            : 
    1609                 :          0 :         local_irq_restore(flags);
    1610                 :          0 :         return ret;
    1611                 :            : }
    1612                 :            : EXPORT_SYMBOL_GPL(queue_work_node);
    1613                 :            : 
    1614                 :          3 : void delayed_work_timer_fn(struct timer_list *t)
    1615                 :            : {
    1616                 :            :         struct delayed_work *dwork = from_timer(dwork, t, timer);
    1617                 :            : 
    1618                 :            :         /* should have been called from irqsafe timer with irq already off */
    1619                 :          3 :         __queue_work(dwork->cpu, dwork->wq, &dwork->work);
    1620                 :          3 : }
    1621                 :            : EXPORT_SYMBOL(delayed_work_timer_fn);
    1622                 :            : 
    1623                 :          3 : static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
    1624                 :            :                                 struct delayed_work *dwork, unsigned long delay)
    1625                 :            : {
    1626                 :          3 :         struct timer_list *timer = &dwork->timer;
    1627                 :            :         struct work_struct *work = &dwork->work;
    1628                 :            : 
    1629                 :          3 :         WARN_ON_ONCE(!wq);
    1630                 :          3 :         WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
    1631                 :          3 :         WARN_ON_ONCE(timer_pending(timer));
    1632                 :          3 :         WARN_ON_ONCE(!list_empty(&work->entry));
    1633                 :            : 
    1634                 :            :         /*
    1635                 :            :          * If @delay is 0, queue @dwork->work immediately.  This is for
    1636                 :            :          * both optimization and correctness.  The earliest @timer can
    1637                 :            :          * expire is on the closest next tick and delayed_work users depend
    1638                 :            :          * on that there's no such delay when @delay is 0.
    1639                 :            :          */
    1640                 :          3 :         if (!delay) {
    1641                 :          3 :                 __queue_work(cpu, wq, &dwork->work);
    1642                 :          3 :                 return;
    1643                 :            :         }
    1644                 :            : 
    1645                 :          3 :         dwork->wq = wq;
    1646                 :          3 :         dwork->cpu = cpu;
    1647                 :          3 :         timer->expires = jiffies + delay;
    1648                 :            : 
    1649                 :          3 :         if (unlikely(cpu != WORK_CPU_UNBOUND))
    1650                 :          3 :                 add_timer_on(timer, cpu);
    1651                 :            :         else
    1652                 :          3 :                 add_timer(timer);
    1653                 :            : }
    1654                 :            : 
    1655                 :            : /**
    1656                 :            :  * queue_delayed_work_on - queue work on specific CPU after delay
    1657                 :            :  * @cpu: CPU number to execute work on
    1658                 :            :  * @wq: workqueue to use
    1659                 :            :  * @dwork: work to queue
    1660                 :            :  * @delay: number of jiffies to wait before queueing
    1661                 :            :  *
    1662                 :            :  * Return: %false if @work was already on a queue, %true otherwise.  If
    1663                 :            :  * @delay is zero and @dwork is idle, it will be scheduled for immediate
    1664                 :            :  * execution.
    1665                 :            :  */
    1666                 :          3 : bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
    1667                 :            :                            struct delayed_work *dwork, unsigned long delay)
    1668                 :            : {
    1669                 :            :         struct work_struct *work = &dwork->work;
    1670                 :            :         bool ret = false;
    1671                 :            :         unsigned long flags;
    1672                 :            : 
    1673                 :            :         /* read the comment in __queue_work() */
    1674                 :          3 :         local_irq_save(flags);
    1675                 :            : 
    1676                 :          3 :         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
    1677                 :          3 :                 __queue_delayed_work(cpu, wq, dwork, delay);
    1678                 :            :                 ret = true;
    1679                 :            :         }
    1680                 :            : 
    1681                 :          3 :         local_irq_restore(flags);
    1682                 :          3 :         return ret;
    1683                 :            : }
    1684                 :            : EXPORT_SYMBOL(queue_delayed_work_on);
    1685                 :            : 
    1686                 :            : /**
    1687                 :            :  * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
    1688                 :            :  * @cpu: CPU number to execute work on
    1689                 :            :  * @wq: workqueue to use
    1690                 :            :  * @dwork: work to queue
    1691                 :            :  * @delay: number of jiffies to wait before queueing
    1692                 :            :  *
    1693                 :            :  * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise,
    1694                 :            :  * modify @dwork's timer so that it expires after @delay.  If @delay is
    1695                 :            :  * zero, @work is guaranteed to be scheduled immediately regardless of its
    1696                 :            :  * current state.
    1697                 :            :  *
    1698                 :            :  * Return: %false if @dwork was idle and queued, %true if @dwork was
    1699                 :            :  * pending and its timer was modified.
    1700                 :            :  *
    1701                 :            :  * This function is safe to call from any context including IRQ handler.
    1702                 :            :  * See try_to_grab_pending() for details.
    1703                 :            :  */
    1704                 :          3 : bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
    1705                 :            :                          struct delayed_work *dwork, unsigned long delay)
    1706                 :            : {
    1707                 :            :         unsigned long flags;
    1708                 :            :         int ret;
    1709                 :            : 
    1710                 :            :         do {
    1711                 :          3 :                 ret = try_to_grab_pending(&dwork->work, true, &flags);
    1712                 :          3 :         } while (unlikely(ret == -EAGAIN));
    1713                 :            : 
    1714                 :          3 :         if (likely(ret >= 0)) {
    1715                 :          3 :                 __queue_delayed_work(cpu, wq, dwork, delay);
    1716                 :          3 :                 local_irq_restore(flags);
    1717                 :            :         }
    1718                 :            : 
    1719                 :            :         /* -ENOENT from try_to_grab_pending() becomes %true */
    1720                 :          3 :         return ret;
    1721                 :            : }
    1722                 :            : EXPORT_SYMBOL_GPL(mod_delayed_work_on);
    1723                 :            : 
    1724                 :          3 : static void rcu_work_rcufn(struct rcu_head *rcu)
    1725                 :            : {
    1726                 :            :         struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
    1727                 :            : 
    1728                 :            :         /* read the comment in __queue_work() */
    1729                 :          3 :         local_irq_disable();
    1730                 :          3 :         __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
    1731                 :          3 :         local_irq_enable();
    1732                 :          3 : }
    1733                 :            : 
    1734                 :            : /**
    1735                 :            :  * queue_rcu_work - queue work after a RCU grace period
    1736                 :            :  * @wq: workqueue to use
    1737                 :            :  * @rwork: work to queue
    1738                 :            :  *
    1739                 :            :  * Return: %false if @rwork was already pending, %true otherwise.  Note
    1740                 :            :  * that a full RCU grace period is guaranteed only after a %true return.
    1741                 :            :  * While @rwork is guaranteed to be executed after a %false return, the
    1742                 :            :  * execution may happen before a full RCU grace period has passed.
    1743                 :            :  */
    1744                 :          3 : bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
    1745                 :            : {
    1746                 :            :         struct work_struct *work = &rwork->work;
    1747                 :            : 
    1748                 :          3 :         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
    1749                 :          3 :                 rwork->wq = wq;
    1750                 :          3 :                 call_rcu(&rwork->rcu, rcu_work_rcufn);
    1751                 :          3 :                 return true;
    1752                 :            :         }
    1753                 :            : 
    1754                 :            :         return false;
    1755                 :            : }
    1756                 :            : EXPORT_SYMBOL(queue_rcu_work);
    1757                 :            : 
    1758                 :            : /**
    1759                 :            :  * worker_enter_idle - enter idle state
    1760                 :            :  * @worker: worker which is entering idle state
    1761                 :            :  *
    1762                 :            :  * @worker is entering idle state.  Update stats and idle timer if
    1763                 :            :  * necessary.
    1764                 :            :  *
    1765                 :            :  * LOCKING:
    1766                 :            :  * spin_lock_irq(pool->lock).
    1767                 :            :  */
    1768                 :          3 : static void worker_enter_idle(struct worker *worker)
    1769                 :            : {
    1770                 :          3 :         struct worker_pool *pool = worker->pool;
    1771                 :            : 
    1772                 :          3 :         if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
    1773                 :          3 :             WARN_ON_ONCE(!list_empty(&worker->entry) &&
    1774                 :            :                          (worker->hentry.next || worker->hentry.pprev)))
    1775                 :          3 :                 return;
    1776                 :            : 
    1777                 :            :         /* can't use worker_set_flags(), also called from create_worker() */
    1778                 :          3 :         worker->flags |= WORKER_IDLE;
    1779                 :          3 :         pool->nr_idle++;
    1780                 :          3 :         worker->last_active = jiffies;
    1781                 :            : 
    1782                 :            :         /* idle_list is LIFO */
    1783                 :          3 :         list_add(&worker->entry, &pool->idle_list);
    1784                 :            : 
    1785                 :          3 :         if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
    1786                 :          3 :                 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
    1787                 :            : 
    1788                 :            :         /*
    1789                 :            :          * Sanity check nr_running.  Because unbind_workers() releases
    1790                 :            :          * pool->lock between setting %WORKER_UNBOUND and zapping
    1791                 :            :          * nr_running, the warning may trigger spuriously.  Check iff
    1792                 :            :          * unbind is not in progress.
    1793                 :            :          */
    1794                 :          3 :         WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
    1795                 :            :                      pool->nr_workers == pool->nr_idle &&
    1796                 :            :                      atomic_read(&pool->nr_running));
    1797                 :            : }
    1798                 :            : 
    1799                 :            : /**
    1800                 :            :  * worker_leave_idle - leave idle state
    1801                 :            :  * @worker: worker which is leaving idle state
    1802                 :            :  *
    1803                 :            :  * @worker is leaving idle state.  Update stats.
    1804                 :            :  *
    1805                 :            :  * LOCKING:
    1806                 :            :  * spin_lock_irq(pool->lock).
    1807                 :            :  */
    1808                 :          3 : static void worker_leave_idle(struct worker *worker)
    1809                 :            : {
    1810                 :          3 :         struct worker_pool *pool = worker->pool;
    1811                 :            : 
    1812                 :          3 :         if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
    1813                 :          3 :                 return;
    1814                 :          3 :         worker_clr_flags(worker, WORKER_IDLE);
    1815                 :          3 :         pool->nr_idle--;
    1816                 :          3 :         list_del_init(&worker->entry);
    1817                 :            : }
    1818                 :            : 
    1819                 :          3 : static struct worker *alloc_worker(int node)
    1820                 :            : {
    1821                 :            :         struct worker *worker;
    1822                 :            : 
    1823                 :          3 :         worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
    1824                 :          3 :         if (worker) {
    1825                 :          3 :                 INIT_LIST_HEAD(&worker->entry);
    1826                 :          3 :                 INIT_LIST_HEAD(&worker->scheduled);
    1827                 :          3 :                 INIT_LIST_HEAD(&worker->node);
    1828                 :            :                 /* on creation a worker is in !idle && prep state */
    1829                 :          3 :                 worker->flags = WORKER_PREP;
    1830                 :            :         }
    1831                 :          3 :         return worker;
    1832                 :            : }
    1833                 :            : 
    1834                 :            : /**
    1835                 :            :  * worker_attach_to_pool() - attach a worker to a pool
    1836                 :            :  * @worker: worker to be attached
    1837                 :            :  * @pool: the target pool
    1838                 :            :  *
    1839                 :            :  * Attach @worker to @pool.  Once attached, the %WORKER_UNBOUND flag and
    1840                 :            :  * cpu-binding of @worker are kept coordinated with the pool across
    1841                 :            :  * cpu-[un]hotplugs.
    1842                 :            :  */
    1843                 :          3 : static void worker_attach_to_pool(struct worker *worker,
    1844                 :            :                                    struct worker_pool *pool)
    1845                 :            : {
    1846                 :          3 :         mutex_lock(&wq_pool_attach_mutex);
    1847                 :            : 
    1848                 :            :         /*
    1849                 :            :          * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
    1850                 :            :          * online CPUs.  It'll be re-applied when any of the CPUs come up.
    1851                 :            :          */
    1852                 :          3 :         set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
    1853                 :            : 
    1854                 :            :         /*
    1855                 :            :          * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains
    1856                 :            :          * stable across this function.  See the comments above the flag
    1857                 :            :          * definition for details.
    1858                 :            :          */
    1859                 :          3 :         if (pool->flags & POOL_DISASSOCIATED)
    1860                 :          3 :                 worker->flags |= WORKER_UNBOUND;
    1861                 :            : 
    1862                 :          3 :         list_add_tail(&worker->node, &pool->workers);
    1863                 :          3 :         worker->pool = pool;
    1864                 :            : 
    1865                 :          3 :         mutex_unlock(&wq_pool_attach_mutex);
    1866                 :          3 : }
    1867                 :            : 
    1868                 :            : /**
    1869                 :            :  * worker_detach_from_pool() - detach a worker from its pool
    1870                 :            :  * @worker: worker which is attached to its pool
    1871                 :            :  *
    1872                 :            :  * Undo the attaching which had been done in worker_attach_to_pool().  The
    1873                 :            :  * caller worker shouldn't access to the pool after detached except it has
    1874                 :            :  * other reference to the pool.
    1875                 :            :  */
    1876                 :          1 : static void worker_detach_from_pool(struct worker *worker)
    1877                 :            : {
    1878                 :          1 :         struct worker_pool *pool = worker->pool;
    1879                 :            :         struct completion *detach_completion = NULL;
    1880                 :            : 
    1881                 :          1 :         mutex_lock(&wq_pool_attach_mutex);
    1882                 :            : 
    1883                 :            :         list_del(&worker->node);
    1884                 :          1 :         worker->pool = NULL;
    1885                 :            : 
    1886                 :          1 :         if (list_empty(&pool->workers))
    1887                 :          0 :                 detach_completion = pool->detach_completion;
    1888                 :          1 :         mutex_unlock(&wq_pool_attach_mutex);
    1889                 :            : 
    1890                 :            :         /* clear leftover flags without pool->lock after it is detached */
    1891                 :          1 :         worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
    1892                 :            : 
    1893                 :          1 :         if (detach_completion)
    1894                 :          0 :                 complete(detach_completion);
    1895                 :          1 : }
    1896                 :            : 
    1897                 :            : /**
    1898                 :            :  * create_worker - create a new workqueue worker
    1899                 :            :  * @pool: pool the new worker will belong to
    1900                 :            :  *
    1901                 :            :  * Create and start a new worker which is attached to @pool.
    1902                 :            :  *
    1903                 :            :  * CONTEXT:
    1904                 :            :  * Might sleep.  Does GFP_KERNEL allocations.
    1905                 :            :  *
    1906                 :            :  * Return:
    1907                 :            :  * Pointer to the newly created worker.
    1908                 :            :  */
    1909                 :          3 : static struct worker *create_worker(struct worker_pool *pool)
    1910                 :            : {
    1911                 :            :         struct worker *worker = NULL;
    1912                 :            :         int id = -1;
    1913                 :            :         char id_buf[16];
    1914                 :            : 
    1915                 :            :         /* ID is needed to determine kthread name */
    1916                 :          3 :         id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
    1917                 :          3 :         if (id < 0)
    1918                 :            :                 goto fail;
    1919                 :            : 
    1920                 :          3 :         worker = alloc_worker(pool->node);
    1921                 :          3 :         if (!worker)
    1922                 :            :                 goto fail;
    1923                 :            : 
    1924                 :          3 :         worker->id = id;
    1925                 :            : 
    1926                 :          3 :         if (pool->cpu >= 0)
    1927                 :          3 :                 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
    1928                 :          3 :                          pool->attrs->nice < 0  ? "H" : "");
    1929                 :            :         else
    1930                 :          3 :                 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
    1931                 :            : 
    1932                 :          3 :         worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
    1933                 :            :                                               "kworker/%s", id_buf);
    1934                 :          3 :         if (IS_ERR(worker->task))
    1935                 :            :                 goto fail;
    1936                 :            : 
    1937                 :          3 :         set_user_nice(worker->task, pool->attrs->nice);
    1938                 :          3 :         kthread_bind_mask(worker->task, pool->attrs->cpumask);
    1939                 :            : 
    1940                 :            :         /* successful, attach the worker to the pool */
    1941                 :          3 :         worker_attach_to_pool(worker, pool);
    1942                 :            : 
    1943                 :            :         /* start the newly created worker */
    1944                 :            :         spin_lock_irq(&pool->lock);
    1945                 :          3 :         worker->pool->nr_workers++;
    1946                 :          3 :         worker_enter_idle(worker);
    1947                 :          3 :         wake_up_process(worker->task);
    1948                 :            :         spin_unlock_irq(&pool->lock);
    1949                 :            : 
    1950                 :          3 :         return worker;
    1951                 :            : 
    1952                 :            : fail:
    1953                 :          3 :         if (id >= 0)
    1954                 :          0 :                 ida_simple_remove(&pool->worker_ida, id);
    1955                 :          3 :         kfree(worker);
    1956                 :          0 :         return NULL;
    1957                 :            : }
    1958                 :            : 
    1959                 :            : /**
    1960                 :            :  * destroy_worker - destroy a workqueue worker
    1961                 :            :  * @worker: worker to be destroyed
    1962                 :            :  *
    1963                 :            :  * Destroy @worker and adjust @pool stats accordingly.  The worker should
    1964                 :            :  * be idle.
    1965                 :            :  *
    1966                 :            :  * CONTEXT:
    1967                 :            :  * spin_lock_irq(pool->lock).
    1968                 :            :  */
    1969                 :          0 : static void destroy_worker(struct worker *worker)
    1970                 :            : {
    1971                 :          0 :         struct worker_pool *pool = worker->pool;
    1972                 :            : 
    1973                 :            :         lockdep_assert_held(&pool->lock);
    1974                 :            : 
    1975                 :            :         /* sanity check frenzy */
    1976                 :          0 :         if (WARN_ON(worker->current_work) ||
    1977                 :          0 :             WARN_ON(!list_empty(&worker->scheduled)) ||
    1978                 :          0 :             WARN_ON(!(worker->flags & WORKER_IDLE)))
    1979                 :          0 :                 return;
    1980                 :            : 
    1981                 :          0 :         pool->nr_workers--;
    1982                 :          0 :         pool->nr_idle--;
    1983                 :            : 
    1984                 :          0 :         list_del_init(&worker->entry);
    1985                 :          0 :         worker->flags |= WORKER_DIE;
    1986                 :          0 :         wake_up_process(worker->task);
    1987                 :            : }
    1988                 :            : 
    1989                 :          0 : static void idle_worker_timeout(struct timer_list *t)
    1990                 :            : {
    1991                 :            :         struct worker_pool *pool = from_timer(pool, t, idle_timer);
    1992                 :            : 
    1993                 :            :         spin_lock_irq(&pool->lock);
    1994                 :            : 
    1995                 :          0 :         while (too_many_workers(pool)) {
    1996                 :            :                 struct worker *worker;
    1997                 :            :                 unsigned long expires;
    1998                 :            : 
    1999                 :            :                 /* idle_list is kept in LIFO order, check the last one */
    2000                 :          0 :                 worker = list_entry(pool->idle_list.prev, struct worker, entry);
    2001                 :          0 :                 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
    2002                 :            : 
    2003                 :          0 :                 if (time_before(jiffies, expires)) {
    2004                 :          0 :                         mod_timer(&pool->idle_timer, expires);
    2005                 :          0 :                         break;
    2006                 :            :                 }
    2007                 :            : 
    2008                 :          0 :                 destroy_worker(worker);
    2009                 :            :         }
    2010                 :            : 
    2011                 :            :         spin_unlock_irq(&pool->lock);
    2012                 :          0 : }
    2013                 :            : 
    2014                 :          3 : static void send_mayday(struct work_struct *work)
    2015                 :            : {
    2016                 :            :         struct pool_workqueue *pwq = get_work_pwq(work);
    2017                 :          3 :         struct workqueue_struct *wq = pwq->wq;
    2018                 :            : 
    2019                 :            :         lockdep_assert_held(&wq_mayday_lock);
    2020                 :            : 
    2021                 :          3 :         if (!wq->rescuer)
    2022                 :          3 :                 return;
    2023                 :            : 
    2024                 :            :         /* mayday mayday mayday */
    2025                 :          1 :         if (list_empty(&pwq->mayday_node)) {
    2026                 :            :                 /*
    2027                 :            :                  * If @pwq is for an unbound wq, its base ref may be put at
    2028                 :            :                  * any time due to an attribute change.  Pin @pwq until the
    2029                 :            :                  * rescuer is done with it.
    2030                 :            :                  */
    2031                 :          1 :                 get_pwq(pwq);
    2032                 :          1 :                 list_add_tail(&pwq->mayday_node, &wq->maydays);
    2033                 :          1 :                 wake_up_process(wq->rescuer->task);
    2034                 :            :         }
    2035                 :            : }
    2036                 :            : 
    2037                 :          3 : static void pool_mayday_timeout(struct timer_list *t)
    2038                 :            : {
    2039                 :            :         struct worker_pool *pool = from_timer(pool, t, mayday_timer);
    2040                 :            :         struct work_struct *work;
    2041                 :            : 
    2042                 :            :         spin_lock_irq(&pool->lock);
    2043                 :            :         spin_lock(&wq_mayday_lock);         /* for wq->maydays */
    2044                 :            : 
    2045                 :          3 :         if (need_to_create_worker(pool)) {
    2046                 :            :                 /*
    2047                 :            :                  * We've been trying to create a new worker but
    2048                 :            :                  * haven't been successful.  We might be hitting an
    2049                 :            :                  * allocation deadlock.  Send distress signals to
    2050                 :            :                  * rescuers.
    2051                 :            :                  */
    2052                 :          3 :                 list_for_each_entry(work, &pool->worklist, entry)
    2053                 :          3 :                         send_mayday(work);
    2054                 :            :         }
    2055                 :            : 
    2056                 :            :         spin_unlock(&wq_mayday_lock);
    2057                 :            :         spin_unlock_irq(&pool->lock);
    2058                 :            : 
    2059                 :          3 :         mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
    2060                 :          3 : }
    2061                 :            : 
    2062                 :            : /**
    2063                 :            :  * maybe_create_worker - create a new worker if necessary
    2064                 :            :  * @pool: pool to create a new worker for
    2065                 :            :  *
    2066                 :            :  * Create a new worker for @pool if necessary.  @pool is guaranteed to
    2067                 :            :  * have at least one idle worker on return from this function.  If
    2068                 :            :  * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
    2069                 :            :  * sent to all rescuers with works scheduled on @pool to resolve
    2070                 :            :  * possible allocation deadlock.
    2071                 :            :  *
    2072                 :            :  * On return, need_to_create_worker() is guaranteed to be %false and
    2073                 :            :  * may_start_working() %true.
    2074                 :            :  *
    2075                 :            :  * LOCKING:
    2076                 :            :  * spin_lock_irq(pool->lock) which may be released and regrabbed
    2077                 :            :  * multiple times.  Does GFP_KERNEL allocations.  Called only from
    2078                 :            :  * manager.
    2079                 :            :  */
    2080                 :          3 : static void maybe_create_worker(struct worker_pool *pool)
    2081                 :            : __releases(&pool->lock)
    2082                 :            : __acquires(&pool->lock)
    2083                 :            : {
    2084                 :            : restart:
    2085                 :            :         spin_unlock_irq(&pool->lock);
    2086                 :            : 
    2087                 :            :         /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
    2088                 :          3 :         mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
    2089                 :            : 
    2090                 :            :         while (true) {
    2091                 :          3 :                 if (create_worker(pool) || !need_to_create_worker(pool))
    2092                 :            :                         break;
    2093                 :            : 
    2094                 :          0 :                 schedule_timeout_interruptible(CREATE_COOLDOWN);
    2095                 :            : 
    2096                 :          0 :                 if (!need_to_create_worker(pool))
    2097                 :            :                         break;
    2098                 :            :         }
    2099                 :            : 
    2100                 :          3 :         del_timer_sync(&pool->mayday_timer);
    2101                 :            :         spin_lock_irq(&pool->lock);
    2102                 :            :         /*
    2103                 :            :          * This is necessary even after a new worker was just successfully
    2104                 :            :          * created as @pool->lock was dropped and the new worker might have
    2105                 :            :          * already become busy.
    2106                 :            :          */
    2107                 :          3 :         if (need_to_create_worker(pool))
    2108                 :            :                 goto restart;
    2109                 :          3 : }
    2110                 :            : 
    2111                 :            : /**
    2112                 :            :  * manage_workers - manage worker pool
    2113                 :            :  * @worker: self
    2114                 :            :  *
    2115                 :            :  * Assume the manager role and manage the worker pool @worker belongs
    2116                 :            :  * to.  At any given time, there can be only zero or one manager per
    2117                 :            :  * pool.  The exclusion is handled automatically by this function.
    2118                 :            :  *
    2119                 :            :  * The caller can safely start processing works on false return.  On
    2120                 :            :  * true return, it's guaranteed that need_to_create_worker() is false
    2121                 :            :  * and may_start_working() is true.
    2122                 :            :  *
    2123                 :            :  * CONTEXT:
    2124                 :            :  * spin_lock_irq(pool->lock) which may be released and regrabbed
    2125                 :            :  * multiple times.  Does GFP_KERNEL allocations.
    2126                 :            :  *
    2127                 :            :  * Return:
    2128                 :            :  * %false if the pool doesn't need management and the caller can safely
    2129                 :            :  * start processing works, %true if management function was performed and
    2130                 :            :  * the conditions that the caller verified before calling the function may
    2131                 :            :  * no longer be true.
    2132                 :            :  */
    2133                 :          3 : static bool manage_workers(struct worker *worker)
    2134                 :            : {
    2135                 :          3 :         struct worker_pool *pool = worker->pool;
    2136                 :            : 
    2137                 :          3 :         if (pool->flags & POOL_MANAGER_ACTIVE)
    2138                 :            :                 return false;
    2139                 :            : 
    2140                 :          3 :         pool->flags |= POOL_MANAGER_ACTIVE;
    2141                 :          3 :         pool->manager = worker;
    2142                 :            : 
    2143                 :          3 :         maybe_create_worker(pool);
    2144                 :            : 
    2145                 :          3 :         pool->manager = NULL;
    2146                 :          3 :         pool->flags &= ~POOL_MANAGER_ACTIVE;
    2147                 :          3 :         wake_up(&wq_manager_wait);
    2148                 :          3 :         return true;
    2149                 :            : }
    2150                 :            : 
    2151                 :            : /**
    2152                 :            :  * process_one_work - process single work
    2153                 :            :  * @worker: self
    2154                 :            :  * @work: work to process
    2155                 :            :  *
    2156                 :            :  * Process @work.  This function contains all the logics necessary to
    2157                 :            :  * process a single work including synchronization against and
    2158                 :            :  * interaction with other workers on the same cpu, queueing and
    2159                 :            :  * flushing.  As long as context requirement is met, any worker can
    2160                 :            :  * call this function to process a work.
    2161                 :            :  *
    2162                 :            :  * CONTEXT:
    2163                 :            :  * spin_lock_irq(pool->lock) which is released and regrabbed.
    2164                 :            :  */
    2165                 :          3 : static void process_one_work(struct worker *worker, struct work_struct *work)
    2166                 :            : __releases(&pool->lock)
    2167                 :            : __acquires(&pool->lock)
    2168                 :            : {
    2169                 :            :         struct pool_workqueue *pwq = get_work_pwq(work);
    2170                 :          3 :         struct worker_pool *pool = worker->pool;
    2171                 :          3 :         bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
    2172                 :            :         int work_color;
    2173                 :            :         struct worker *collision;
    2174                 :            : #ifdef CONFIG_LOCKDEP
    2175                 :            :         /*
    2176                 :            :          * It is permissible to free the struct work_struct from
    2177                 :            :          * inside the function that is called from it, this we need to
    2178                 :            :          * take into account for lockdep too.  To avoid bogus "held
    2179                 :            :          * lock freed" warnings as well as problems when looking into
    2180                 :            :          * work->lockdep_map, make a copy and use that here.
    2181                 :            :          */
    2182                 :            :         struct lockdep_map lockdep_map;
    2183                 :            : 
    2184                 :            :         lockdep_copy_map(&lockdep_map, &work->lockdep_map);
    2185                 :            : #endif
    2186                 :            :         /* ensure we're on the correct CPU */
    2187                 :          3 :         WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
    2188                 :            :                      raw_smp_processor_id() != pool->cpu);
    2189                 :            : 
    2190                 :            :         /*
    2191                 :            :          * A single work shouldn't be executed concurrently by
    2192                 :            :          * multiple workers on a single cpu.  Check whether anyone is
    2193                 :            :          * already processing the work.  If so, defer the work to the
    2194                 :            :          * currently executing one.
    2195                 :            :          */
    2196                 :          3 :         collision = find_worker_executing_work(pool, work);
    2197                 :          3 :         if (unlikely(collision)) {
    2198                 :          3 :                 move_linked_works(work, &collision->scheduled, NULL);
    2199                 :          3 :                 return;
    2200                 :            :         }
    2201                 :            : 
    2202                 :            :         /* claim and dequeue */
    2203                 :            :         debug_work_deactivate(work);
    2204                 :          3 :         hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
    2205                 :          3 :         worker->current_work = work;
    2206                 :          3 :         worker->current_func = work->func;
    2207                 :          3 :         worker->current_pwq = pwq;
    2208                 :            :         work_color = get_work_color(work);
    2209                 :            : 
    2210                 :            :         /*
    2211                 :            :          * Record wq name for cmdline and debug reporting, may get
    2212                 :            :          * overridden through set_worker_desc().
    2213                 :            :          */
    2214                 :          3 :         strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
    2215                 :            : 
    2216                 :          3 :         list_del_init(&work->entry);
    2217                 :            : 
    2218                 :            :         /*
    2219                 :            :          * CPU intensive works don't participate in concurrency management.
    2220                 :            :          * They're the scheduler's responsibility.  This takes @worker out
    2221                 :            :          * of concurrency management and the next code block will chain
    2222                 :            :          * execution of the pending work items.
    2223                 :            :          */
    2224                 :          3 :         if (unlikely(cpu_intensive))
    2225                 :          0 :                 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
    2226                 :            : 
    2227                 :            :         /*
    2228                 :            :          * Wake up another worker if necessary.  The condition is always
    2229                 :            :          * false for normal per-cpu workers since nr_running would always
    2230                 :            :          * be >= 1 at this point.  This is used to chain execution of the
    2231                 :            :          * pending work items for WORKER_NOT_RUNNING workers such as the
    2232                 :            :          * UNBOUND and CPU_INTENSIVE ones.
    2233                 :            :          */
    2234                 :          3 :         if (need_more_worker(pool))
    2235                 :          3 :                 wake_up_worker(pool);
    2236                 :            : 
    2237                 :            :         /*
    2238                 :            :          * Record the last pool and clear PENDING which should be the last
    2239                 :            :          * update to @work.  Also, do this inside @pool->lock so that
    2240                 :            :          * PENDING and queued state changes happen together while IRQ is
    2241                 :            :          * disabled.
    2242                 :            :          */
    2243                 :          3 :         set_work_pool_and_clear_pending(work, pool->id);
    2244                 :            : 
    2245                 :            :         spin_unlock_irq(&pool->lock);
    2246                 :            : 
    2247                 :            :         lock_map_acquire(&pwq->wq->lockdep_map);
    2248                 :            :         lock_map_acquire(&lockdep_map);
    2249                 :            :         /*
    2250                 :            :          * Strictly speaking we should mark the invariant state without holding
    2251                 :            :          * any locks, that is, before these two lock_map_acquire()'s.
    2252                 :            :          *
    2253                 :            :          * However, that would result in:
    2254                 :            :          *
    2255                 :            :          *   A(W1)
    2256                 :            :          *   WFC(C)
    2257                 :            :          *              A(W1)
    2258                 :            :          *              C(C)
    2259                 :            :          *
    2260                 :            :          * Which would create W1->C->W1 dependencies, even though there is no
    2261                 :            :          * actual deadlock possible. There are two solutions, using a
    2262                 :            :          * read-recursive acquire on the work(queue) 'locks', but this will then
    2263                 :            :          * hit the lockdep limitation on recursive locks, or simply discard
    2264                 :            :          * these locks.
    2265                 :            :          *
    2266                 :            :          * AFAICT there is no possible deadlock scenario between the
    2267                 :            :          * flush_work() and complete() primitives (except for single-threaded
    2268                 :            :          * workqueues), so hiding them isn't a problem.
    2269                 :            :          */
    2270                 :            :         lockdep_invariant_state(true);
    2271                 :          3 :         trace_workqueue_execute_start(work);
    2272                 :          3 :         worker->current_func(work);
    2273                 :            :         /*
    2274                 :            :          * While we must be careful to not use "work" after this, the trace
    2275                 :            :          * point will only record its address.
    2276                 :            :          */
    2277                 :          3 :         trace_workqueue_execute_end(work);
    2278                 :            :         lock_map_release(&lockdep_map);
    2279                 :            :         lock_map_release(&pwq->wq->lockdep_map);
    2280                 :            : 
    2281                 :          3 :         if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
    2282                 :          0 :                 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
    2283                 :            :                        "     last function: %ps\n",
    2284                 :            :                        current->comm, preempt_count(), task_pid_nr(current),
    2285                 :            :                        worker->current_func);
    2286                 :            :                 debug_show_held_locks(current);
    2287                 :          0 :                 dump_stack();
    2288                 :            :         }
    2289                 :            : 
    2290                 :            :         /*
    2291                 :            :          * The following prevents a kworker from hogging CPU on !PREEMPT
    2292                 :            :          * kernels, where a requeueing work item waiting for something to
    2293                 :            :          * happen could deadlock with stop_machine as such work item could
    2294                 :            :          * indefinitely requeue itself while all other CPUs are trapped in
    2295                 :            :          * stop_machine. At the same time, report a quiescent RCU state so
    2296                 :            :          * the same condition doesn't freeze RCU.
    2297                 :            :          */
    2298                 :          3 :         cond_resched();
    2299                 :            : 
    2300                 :            :         spin_lock_irq(&pool->lock);
    2301                 :            : 
    2302                 :            :         /* clear cpu intensive status */
    2303                 :          3 :         if (unlikely(cpu_intensive))
    2304                 :          0 :                 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
    2305                 :            : 
    2306                 :            :         /* tag the worker for identification in schedule() */
    2307                 :          3 :         worker->last_func = worker->current_func;
    2308                 :            : 
    2309                 :            :         /* we're done with it, release */
    2310                 :            :         hash_del(&worker->hentry);
    2311                 :          3 :         worker->current_work = NULL;
    2312                 :          3 :         worker->current_func = NULL;
    2313                 :          3 :         worker->current_pwq = NULL;
    2314                 :          3 :         pwq_dec_nr_in_flight(pwq, work_color);
    2315                 :            : }
    2316                 :            : 
    2317                 :            : /**
    2318                 :            :  * process_scheduled_works - process scheduled works
    2319                 :            :  * @worker: self
    2320                 :            :  *
    2321                 :            :  * Process all scheduled works.  Please note that the scheduled list
    2322                 :            :  * may change while processing a work, so this function repeatedly
    2323                 :            :  * fetches a work from the top and executes it.
    2324                 :            :  *
    2325                 :            :  * CONTEXT:
    2326                 :            :  * spin_lock_irq(pool->lock) which may be released and regrabbed
    2327                 :            :  * multiple times.
    2328                 :            :  */
    2329                 :            : static void process_scheduled_works(struct worker *worker)
    2330                 :            : {
    2331                 :          3 :         while (!list_empty(&worker->scheduled)) {
    2332                 :          3 :                 struct work_struct *work = list_first_entry(&worker->scheduled,
    2333                 :            :                                                 struct work_struct, entry);
    2334                 :          3 :                 process_one_work(worker, work);
    2335                 :            :         }
    2336                 :            : }
    2337                 :            : 
    2338                 :          3 : static void set_pf_worker(bool val)
    2339                 :            : {
    2340                 :          3 :         mutex_lock(&wq_pool_attach_mutex);
    2341                 :          3 :         if (val)
    2342                 :          3 :                 current->flags |= PF_WQ_WORKER;
    2343                 :            :         else
    2344                 :          2 :                 current->flags &= ~PF_WQ_WORKER;
    2345                 :          3 :         mutex_unlock(&wq_pool_attach_mutex);
    2346                 :          3 : }
    2347                 :            : 
    2348                 :            : /**
    2349                 :            :  * worker_thread - the worker thread function
    2350                 :            :  * @__worker: self
    2351                 :            :  *
    2352                 :            :  * The worker thread function.  All workers belong to a worker_pool -
    2353                 :            :  * either a per-cpu one or dynamic unbound one.  These workers process all
    2354                 :            :  * work items regardless of their specific target workqueue.  The only
    2355                 :            :  * exception is work items which belong to workqueues with a rescuer which
    2356                 :            :  * will be explained in rescuer_thread().
    2357                 :            :  *
    2358                 :            :  * Return: 0
    2359                 :            :  */
    2360                 :          3 : static int worker_thread(void *__worker)
    2361                 :            : {
    2362                 :            :         struct worker *worker = __worker;
    2363                 :          3 :         struct worker_pool *pool = worker->pool;
    2364                 :            : 
    2365                 :            :         /* tell the scheduler that this is a workqueue worker */
    2366                 :          3 :         set_pf_worker(true);
    2367                 :            : woke_up:
    2368                 :            :         spin_lock_irq(&pool->lock);
    2369                 :            : 
    2370                 :            :         /* am I supposed to die? */
    2371                 :          3 :         if (unlikely(worker->flags & WORKER_DIE)) {
    2372                 :            :                 spin_unlock_irq(&pool->lock);
    2373                 :          0 :                 WARN_ON_ONCE(!list_empty(&worker->entry));
    2374                 :          0 :                 set_pf_worker(false);
    2375                 :            : 
    2376                 :          0 :                 set_task_comm(worker->task, "kworker/dying");
    2377                 :          0 :                 ida_simple_remove(&pool->worker_ida, worker->id);
    2378                 :          0 :                 worker_detach_from_pool(worker);
    2379                 :          0 :                 kfree(worker);
    2380                 :          0 :                 return 0;
    2381                 :            :         }
    2382                 :            : 
    2383                 :          3 :         worker_leave_idle(worker);
    2384                 :            : recheck:
    2385                 :            :         /* no more worker necessary? */
    2386                 :          3 :         if (!need_more_worker(pool))
    2387                 :            :                 goto sleep;
    2388                 :            : 
    2389                 :            :         /* do we need to manage? */
    2390                 :          3 :         if (unlikely(!may_start_working(pool)) && manage_workers(worker))
    2391                 :            :                 goto recheck;
    2392                 :            : 
    2393                 :            :         /*
    2394                 :            :          * ->scheduled list can only be filled while a worker is
    2395                 :            :          * preparing to process a work or actually processing it.
    2396                 :            :          * Make sure nobody diddled with it while I was sleeping.
    2397                 :            :          */
    2398                 :          3 :         WARN_ON_ONCE(!list_empty(&worker->scheduled));
    2399                 :            : 
    2400                 :            :         /*
    2401                 :            :          * Finish PREP stage.  We're guaranteed to have at least one idle
    2402                 :            :          * worker or that someone else has already assumed the manager
    2403                 :            :          * role.  This is where @worker starts participating in concurrency
    2404                 :            :          * management if applicable and concurrency management is restored
    2405                 :            :          * after being rebound.  See rebind_workers() for details.
    2406                 :            :          */
    2407                 :          3 :         worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
    2408                 :            : 
    2409                 :            :         do {
    2410                 :            :                 struct work_struct *work =
    2411                 :          3 :                         list_first_entry(&pool->worklist,
    2412                 :            :                                          struct work_struct, entry);
    2413                 :            : 
    2414                 :          3 :                 pool->watchdog_ts = jiffies;
    2415                 :            : 
    2416                 :          3 :                 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
    2417                 :            :                         /* optimization path, not strictly necessary */
    2418                 :          3 :                         process_one_work(worker, work);
    2419                 :          3 :                         if (unlikely(!list_empty(&worker->scheduled)))
    2420                 :            :                                 process_scheduled_works(worker);
    2421                 :            :                 } else {
    2422                 :          3 :                         move_linked_works(work, &worker->scheduled, NULL);
    2423                 :            :                         process_scheduled_works(worker);
    2424                 :            :                 }
    2425                 :          3 :         } while (keep_working(pool));
    2426                 :            : 
    2427                 :          3 :         worker_set_flags(worker, WORKER_PREP);
    2428                 :            : sleep:
    2429                 :            :         /*
    2430                 :            :          * pool->lock is held and there's no work to process and no need to
    2431                 :            :          * manage, sleep.  Workers are woken up only while holding
    2432                 :            :          * pool->lock or from local cpu, so setting the current state
    2433                 :            :          * before releasing pool->lock is enough to prevent losing any
    2434                 :            :          * event.
    2435                 :            :          */
    2436                 :          3 :         worker_enter_idle(worker);
    2437                 :          3 :         __set_current_state(TASK_IDLE);
    2438                 :            :         spin_unlock_irq(&pool->lock);
    2439                 :          3 :         schedule();
    2440                 :          3 :         goto woke_up;
    2441                 :            : }
    2442                 :            : 
    2443                 :            : /**
    2444                 :            :  * rescuer_thread - the rescuer thread function
    2445                 :            :  * @__rescuer: self
    2446                 :            :  *
    2447                 :            :  * Workqueue rescuer thread function.  There's one rescuer for each
    2448                 :            :  * workqueue which has WQ_MEM_RECLAIM set.
    2449                 :            :  *
    2450                 :            :  * Regular work processing on a pool may block trying to create a new
    2451                 :            :  * worker which uses GFP_KERNEL allocation which has slight chance of
    2452                 :            :  * developing into deadlock if some works currently on the same queue
    2453                 :            :  * need to be processed to satisfy the GFP_KERNEL allocation.  This is
    2454                 :            :  * the problem rescuer solves.
    2455                 :            :  *
    2456                 :            :  * When such condition is possible, the pool summons rescuers of all
    2457                 :            :  * workqueues which have works queued on the pool and let them process
    2458                 :            :  * those works so that forward progress can be guaranteed.
    2459                 :            :  *
    2460                 :            :  * This should happen rarely.
    2461                 :            :  *
    2462                 :            :  * Return: 0
    2463                 :            :  */
    2464                 :          3 : static int rescuer_thread(void *__rescuer)
    2465                 :            : {
    2466                 :            :         struct worker *rescuer = __rescuer;
    2467                 :          3 :         struct workqueue_struct *wq = rescuer->rescue_wq;
    2468                 :          3 :         struct list_head *scheduled = &rescuer->scheduled;
    2469                 :            :         bool should_stop;
    2470                 :            : 
    2471                 :          3 :         set_user_nice(current, RESCUER_NICE_LEVEL);
    2472                 :            : 
    2473                 :            :         /*
    2474                 :            :          * Mark rescuer as worker too.  As WORKER_PREP is never cleared, it
    2475                 :            :          * doesn't participate in concurrency management.
    2476                 :            :          */
    2477                 :          3 :         set_pf_worker(true);
    2478                 :            : repeat:
    2479                 :          3 :         set_current_state(TASK_IDLE);
    2480                 :            : 
    2481                 :            :         /*
    2482                 :            :          * By the time the rescuer is requested to stop, the workqueue
    2483                 :            :          * shouldn't have any work pending, but @wq->maydays may still have
    2484                 :            :          * pwq(s) queued.  This can happen by non-rescuer workers consuming
    2485                 :            :          * all the work items before the rescuer got to them.  Go through
    2486                 :            :          * @wq->maydays processing before acting on should_stop so that the
    2487                 :            :          * list is always empty on exit.
    2488                 :            :          */
    2489                 :          3 :         should_stop = kthread_should_stop();
    2490                 :            : 
    2491                 :            :         /* see whether any pwq is asking for help */
    2492                 :            :         spin_lock_irq(&wq_mayday_lock);
    2493                 :            : 
    2494                 :          3 :         while (!list_empty(&wq->maydays)) {
    2495                 :          1 :                 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
    2496                 :            :                                         struct pool_workqueue, mayday_node);
    2497                 :          1 :                 struct worker_pool *pool = pwq->pool;
    2498                 :            :                 struct work_struct *work, *n;
    2499                 :            :                 bool first = true;
    2500                 :            : 
    2501                 :          1 :                 __set_current_state(TASK_RUNNING);
    2502                 :          1 :                 list_del_init(&pwq->mayday_node);
    2503                 :            : 
    2504                 :            :                 spin_unlock_irq(&wq_mayday_lock);
    2505                 :            : 
    2506                 :          1 :                 worker_attach_to_pool(rescuer, pool);
    2507                 :            : 
    2508                 :            :                 spin_lock_irq(&pool->lock);
    2509                 :            : 
    2510                 :            :                 /*
    2511                 :            :                  * Slurp in all works issued via this workqueue and
    2512                 :            :                  * process'em.
    2513                 :            :                  */
    2514                 :          1 :                 WARN_ON_ONCE(!list_empty(scheduled));
    2515                 :          1 :                 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
    2516                 :          1 :                         if (get_work_pwq(work) == pwq) {
    2517                 :          1 :                                 if (first)
    2518                 :          1 :                                         pool->watchdog_ts = jiffies;
    2519                 :          1 :                                 move_linked_works(work, scheduled, &n);
    2520                 :            :                         }
    2521                 :            :                         first = false;
    2522                 :            :                 }
    2523                 :            : 
    2524                 :          1 :                 if (!list_empty(scheduled)) {
    2525                 :            :                         process_scheduled_works(rescuer);
    2526                 :            : 
    2527                 :            :                         /*
    2528                 :            :                          * The above execution of rescued work items could
    2529                 :            :                          * have created more to rescue through
    2530                 :            :                          * pwq_activate_first_delayed() or chained
    2531                 :            :                          * queueing.  Let's put @pwq back on mayday list so
    2532                 :            :                          * that such back-to-back work items, which may be
    2533                 :            :                          * being used to relieve memory pressure, don't
    2534                 :            :                          * incur MAYDAY_INTERVAL delay inbetween.
    2535                 :            :                          */
    2536                 :          1 :                         if (need_to_create_worker(pool)) {
    2537                 :            :                                 spin_lock(&wq_mayday_lock);
    2538                 :            :                                 /*
    2539                 :            :                                  * Queue iff we aren't racing destruction
    2540                 :            :                                  * and somebody else hasn't queued it already.
    2541                 :            :                                  */
    2542                 :          1 :                                 if (wq->rescuer && list_empty(&pwq->mayday_node)) {
    2543                 :          1 :                                         get_pwq(pwq);
    2544                 :            :                                         list_add_tail(&pwq->mayday_node, &wq->maydays);
    2545                 :            :                                 }
    2546                 :            :                                 spin_unlock(&wq_mayday_lock);
    2547                 :            :                         }
    2548                 :            :                 }
    2549                 :            : 
    2550                 :            :                 /*
    2551                 :            :                  * Put the reference grabbed by send_mayday().  @pool won't
    2552                 :            :                  * go away while we're still attached to it.
    2553                 :            :                  */
    2554                 :          1 :                 put_pwq(pwq);
    2555                 :            : 
    2556                 :            :                 /*
    2557                 :            :                  * Leave this pool.  If need_more_worker() is %true, notify a
    2558                 :            :                  * regular worker; otherwise, we end up with 0 concurrency
    2559                 :            :                  * and stalling the execution.
    2560                 :            :                  */
    2561                 :          1 :                 if (need_more_worker(pool))
    2562                 :          1 :                         wake_up_worker(pool);
    2563                 :            : 
    2564                 :            :                 spin_unlock_irq(&pool->lock);
    2565                 :            : 
    2566                 :          1 :                 worker_detach_from_pool(rescuer);
    2567                 :            : 
    2568                 :            :                 spin_lock_irq(&wq_mayday_lock);
    2569                 :            :         }
    2570                 :            : 
    2571                 :            :         spin_unlock_irq(&wq_mayday_lock);
    2572                 :            : 
    2573                 :          3 :         if (should_stop) {
    2574                 :          2 :                 __set_current_state(TASK_RUNNING);
    2575                 :          2 :                 set_pf_worker(false);
    2576                 :          2 :                 return 0;
    2577                 :            :         }
    2578                 :            : 
    2579                 :            :         /* rescuers should never participate in concurrency management */
    2580                 :          3 :         WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
    2581                 :          3 :         schedule();
    2582                 :          3 :         goto repeat;
    2583                 :            : }
    2584                 :            : 
    2585                 :            : /**
    2586                 :            :  * check_flush_dependency - check for flush dependency sanity
    2587                 :            :  * @target_wq: workqueue being flushed
    2588                 :            :  * @target_work: work item being flushed (NULL for workqueue flushes)
    2589                 :            :  *
    2590                 :            :  * %current is trying to flush the whole @target_wq or @target_work on it.
    2591                 :            :  * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
    2592                 :            :  * reclaiming memory or running on a workqueue which doesn't have
    2593                 :            :  * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
    2594                 :            :  * a deadlock.
    2595                 :            :  */
    2596                 :          3 : static void check_flush_dependency(struct workqueue_struct *target_wq,
    2597                 :            :                                    struct work_struct *target_work)
    2598                 :            : {
    2599                 :          3 :         work_func_t target_func = target_work ? target_work->func : NULL;
    2600                 :            :         struct worker *worker;
    2601                 :            : 
    2602                 :          3 :         if (target_wq->flags & WQ_MEM_RECLAIM)
    2603                 :          3 :                 return;
    2604                 :            : 
    2605                 :          3 :         worker = current_wq_worker();
    2606                 :            : 
    2607                 :          3 :         WARN_ONCE(current->flags & PF_MEMALLOC,
    2608                 :            :                   "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
    2609                 :            :                   current->pid, current->comm, target_wq->name, target_func);
    2610                 :          3 :         WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
    2611                 :            :                               (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
    2612                 :            :                   "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
    2613                 :            :                   worker->current_pwq->wq->name, worker->current_func,
    2614                 :            :                   target_wq->name, target_func);
    2615                 :            : }
    2616                 :            : 
    2617                 :            : struct wq_barrier {
    2618                 :            :         struct work_struct      work;
    2619                 :            :         struct completion       done;
    2620                 :            :         struct task_struct      *task;  /* purely informational */
    2621                 :            : };
    2622                 :            : 
    2623                 :          3 : static void wq_barrier_func(struct work_struct *work)
    2624                 :            : {
    2625                 :            :         struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
    2626                 :          3 :         complete(&barr->done);
    2627                 :          3 : }
    2628                 :            : 
    2629                 :            : /**
    2630                 :            :  * insert_wq_barrier - insert a barrier work
    2631                 :            :  * @pwq: pwq to insert barrier into
    2632                 :            :  * @barr: wq_barrier to insert
    2633                 :            :  * @target: target work to attach @barr to
    2634                 :            :  * @worker: worker currently executing @target, NULL if @target is not executing
    2635                 :            :  *
    2636                 :            :  * @barr is linked to @target such that @barr is completed only after
    2637                 :            :  * @target finishes execution.  Please note that the ordering
    2638                 :            :  * guarantee is observed only with respect to @target and on the local
    2639                 :            :  * cpu.
    2640                 :            :  *
    2641                 :            :  * Currently, a queued barrier can't be canceled.  This is because
    2642                 :            :  * try_to_grab_pending() can't determine whether the work to be
    2643                 :            :  * grabbed is at the head of the queue and thus can't clear LINKED
    2644                 :            :  * flag of the previous work while there must be a valid next work
    2645                 :            :  * after a work with LINKED flag set.
    2646                 :            :  *
    2647                 :            :  * Note that when @worker is non-NULL, @target may be modified
    2648                 :            :  * underneath us, so we can't reliably determine pwq from @target.
    2649                 :            :  *
    2650                 :            :  * CONTEXT:
    2651                 :            :  * spin_lock_irq(pool->lock).
    2652                 :            :  */
    2653                 :          3 : static void insert_wq_barrier(struct pool_workqueue *pwq,
    2654                 :            :                               struct wq_barrier *barr,
    2655                 :            :                               struct work_struct *target, struct worker *worker)
    2656                 :            : {
    2657                 :            :         struct list_head *head;
    2658                 :            :         unsigned int linked = 0;
    2659                 :            : 
    2660                 :            :         /*
    2661                 :            :          * debugobject calls are safe here even with pool->lock locked
    2662                 :            :          * as we know for sure that this will not trigger any of the
    2663                 :            :          * checks and call back into the fixup functions where we
    2664                 :            :          * might deadlock.
    2665                 :            :          */
    2666                 :          3 :         INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
    2667                 :            :         __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
    2668                 :            : 
    2669                 :            :         init_completion_map(&barr->done, &target->lockdep_map);
    2670                 :            : 
    2671                 :          3 :         barr->task = current;
    2672                 :            : 
    2673                 :            :         /*
    2674                 :            :          * If @target is currently being executed, schedule the
    2675                 :            :          * barrier to the worker; otherwise, put it after @target.
    2676                 :            :          */
    2677                 :          3 :         if (worker)
    2678                 :          3 :                 head = worker->scheduled.next;
    2679                 :            :         else {
    2680                 :            :                 unsigned long *bits = work_data_bits(target);
    2681                 :            : 
    2682                 :          3 :                 head = target->entry.next;
    2683                 :            :                 /* there can already be other linked works, inherit and set */
    2684                 :          3 :                 linked = *bits & WORK_STRUCT_LINKED;
    2685                 :            :                 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
    2686                 :            :         }
    2687                 :            : 
    2688                 :            :         debug_work_activate(&barr->work);
    2689                 :          3 :         insert_work(pwq, &barr->work, head,
    2690                 :            :                     work_color_to_flags(WORK_NO_COLOR) | linked);
    2691                 :          3 : }
    2692                 :            : 
    2693                 :            : /**
    2694                 :            :  * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
    2695                 :            :  * @wq: workqueue being flushed
    2696                 :            :  * @flush_color: new flush color, < 0 for no-op
    2697                 :            :  * @work_color: new work color, < 0 for no-op
    2698                 :            :  *
    2699                 :            :  * Prepare pwqs for workqueue flushing.
    2700                 :            :  *
    2701                 :            :  * If @flush_color is non-negative, flush_color on all pwqs should be
    2702                 :            :  * -1.  If no pwq has in-flight commands at the specified color, all
    2703                 :            :  * pwq->flush_color's stay at -1 and %false is returned.  If any pwq
    2704                 :            :  * has in flight commands, its pwq->flush_color is set to
    2705                 :            :  * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
    2706                 :            :  * wakeup logic is armed and %true is returned.
    2707                 :            :  *
    2708                 :            :  * The caller should have initialized @wq->first_flusher prior to
    2709                 :            :  * calling this function with non-negative @flush_color.  If
    2710                 :            :  * @flush_color is negative, no flush color update is done and %false
    2711                 :            :  * is returned.
    2712                 :            :  *
    2713                 :            :  * If @work_color is non-negative, all pwqs should have the same
    2714                 :            :  * work_color which is previous to @work_color and all will be
    2715                 :            :  * advanced to @work_color.
    2716                 :            :  *
    2717                 :            :  * CONTEXT:
    2718                 :            :  * mutex_lock(wq->mutex).
    2719                 :            :  *
    2720                 :            :  * Return:
    2721                 :            :  * %true if @flush_color >= 0 and there's something to flush.  %false
    2722                 :            :  * otherwise.
    2723                 :            :  */
    2724                 :          3 : static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
    2725                 :            :                                       int flush_color, int work_color)
    2726                 :            : {
    2727                 :            :         bool wait = false;
    2728                 :            :         struct pool_workqueue *pwq;
    2729                 :            : 
    2730                 :          3 :         if (flush_color >= 0) {
    2731                 :          3 :                 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
    2732                 :            :                 atomic_set(&wq->nr_pwqs_to_flush, 1);
    2733                 :            :         }
    2734                 :            : 
    2735                 :          3 :         for_each_pwq(pwq, wq) {
    2736                 :          3 :                 struct worker_pool *pool = pwq->pool;
    2737                 :            : 
    2738                 :            :                 spin_lock_irq(&pool->lock);
    2739                 :            : 
    2740                 :          3 :                 if (flush_color >= 0) {
    2741                 :          3 :                         WARN_ON_ONCE(pwq->flush_color != -1);
    2742                 :            : 
    2743                 :          3 :                         if (pwq->nr_in_flight[flush_color]) {
    2744                 :          0 :                                 pwq->flush_color = flush_color;
    2745                 :          0 :                                 atomic_inc(&wq->nr_pwqs_to_flush);
    2746                 :            :                                 wait = true;
    2747                 :            :                         }
    2748                 :            :                 }
    2749                 :            : 
    2750                 :          3 :                 if (work_color >= 0) {
    2751                 :          3 :                         WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
    2752                 :          3 :                         pwq->work_color = work_color;
    2753                 :            :                 }
    2754                 :            : 
    2755                 :            :                 spin_unlock_irq(&pool->lock);
    2756                 :            :         }
    2757                 :            : 
    2758                 :          3 :         if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
    2759                 :          3 :                 complete(&wq->first_flusher->done);
    2760                 :            : 
    2761                 :          3 :         return wait;
    2762                 :            : }
    2763                 :            : 
    2764                 :            : /**
    2765                 :            :  * flush_workqueue - ensure that any scheduled work has run to completion.
    2766                 :            :  * @wq: workqueue to flush
    2767                 :            :  *
    2768                 :            :  * This function sleeps until all work items which were queued on entry
    2769                 :            :  * have finished execution, but it is not livelocked by new incoming ones.
    2770                 :            :  */
    2771                 :          3 : void flush_workqueue(struct workqueue_struct *wq)
    2772                 :            : {
    2773                 :          3 :         struct wq_flusher this_flusher = {
    2774                 :            :                 .list = LIST_HEAD_INIT(this_flusher.list),
    2775                 :            :                 .flush_color = -1,
    2776                 :            :                 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
    2777                 :            :         };
    2778                 :            :         int next_color;
    2779                 :            : 
    2780                 :          3 :         if (WARN_ON(!wq_online))
    2781                 :          0 :                 return;
    2782                 :            : 
    2783                 :            :         lock_map_acquire(&wq->lockdep_map);
    2784                 :            :         lock_map_release(&wq->lockdep_map);
    2785                 :            : 
    2786                 :          3 :         mutex_lock(&wq->mutex);
    2787                 :            : 
    2788                 :            :         /*
    2789                 :            :          * Start-to-wait phase
    2790                 :            :          */
    2791                 :          3 :         next_color = work_next_color(wq->work_color);
    2792                 :            : 
    2793                 :          3 :         if (next_color != wq->flush_color) {
    2794                 :            :                 /*
    2795                 :            :                  * Color space is not full.  The current work_color
    2796                 :            :                  * becomes our flush_color and work_color is advanced
    2797                 :            :                  * by one.
    2798                 :            :                  */
    2799                 :          3 :                 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
    2800                 :          3 :                 this_flusher.flush_color = wq->work_color;
    2801                 :          3 :                 wq->work_color = next_color;
    2802                 :            : 
    2803                 :          3 :                 if (!wq->first_flusher) {
    2804                 :            :                         /* no flush in progress, become the first flusher */
    2805                 :          3 :                         WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
    2806                 :            : 
    2807                 :          3 :                         wq->first_flusher = &this_flusher;
    2808                 :            : 
    2809                 :          3 :                         if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
    2810                 :            :                                                        wq->work_color)) {
    2811                 :            :                                 /* nothing to flush, done */
    2812                 :          3 :                                 wq->flush_color = next_color;
    2813                 :          3 :                                 wq->first_flusher = NULL;
    2814                 :          3 :                                 goto out_unlock;
    2815                 :            :                         }
    2816                 :            :                 } else {
    2817                 :            :                         /* wait in queue */
    2818                 :          0 :                         WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
    2819                 :          0 :                         list_add_tail(&this_flusher.list, &wq->flusher_queue);
    2820                 :          0 :                         flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
    2821                 :            :                 }
    2822                 :            :         } else {
    2823                 :            :                 /*
    2824                 :            :                  * Oops, color space is full, wait on overflow queue.
    2825                 :            :                  * The next flush completion will assign us
    2826                 :            :                  * flush_color and transfer to flusher_queue.
    2827                 :            :                  */
    2828                 :          0 :                 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
    2829                 :            :         }
    2830                 :            : 
    2831                 :          0 :         check_flush_dependency(wq, NULL);
    2832                 :            : 
    2833                 :          0 :         mutex_unlock(&wq->mutex);
    2834                 :            : 
    2835                 :          0 :         wait_for_completion(&this_flusher.done);
    2836                 :            : 
    2837                 :            :         /*
    2838                 :            :          * Wake-up-and-cascade phase
    2839                 :            :          *
    2840                 :            :          * First flushers are responsible for cascading flushes and
    2841                 :            :          * handling overflow.  Non-first flushers can simply return.
    2842                 :            :          */
    2843                 :          0 :         if (wq->first_flusher != &this_flusher)
    2844                 :            :                 return;
    2845                 :            : 
    2846                 :          0 :         mutex_lock(&wq->mutex);
    2847                 :            : 
    2848                 :            :         /* we might have raced, check again with mutex held */
    2849                 :          0 :         if (wq->first_flusher != &this_flusher)
    2850                 :            :                 goto out_unlock;
    2851                 :            : 
    2852                 :          0 :         wq->first_flusher = NULL;
    2853                 :            : 
    2854                 :          0 :         WARN_ON_ONCE(!list_empty(&this_flusher.list));
    2855                 :          0 :         WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
    2856                 :            : 
    2857                 :            :         while (true) {
    2858                 :            :                 struct wq_flusher *next, *tmp;
    2859                 :            : 
    2860                 :            :                 /* complete all the flushers sharing the current flush color */
    2861                 :          0 :                 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
    2862                 :          0 :                         if (next->flush_color != wq->flush_color)
    2863                 :            :                                 break;
    2864                 :            :                         list_del_init(&next->list);
    2865                 :          0 :                         complete(&next->done);
    2866                 :            :                 }
    2867                 :            : 
    2868                 :          0 :                 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
    2869                 :            :                              wq->flush_color != work_next_color(wq->work_color));
    2870                 :            : 
    2871                 :            :                 /* this flush_color is finished, advance by one */
    2872                 :          0 :                 wq->flush_color = work_next_color(wq->flush_color);
    2873                 :            : 
    2874                 :            :                 /* one color has been freed, handle overflow queue */
    2875                 :          0 :                 if (!list_empty(&wq->flusher_overflow)) {
    2876                 :            :                         /*
    2877                 :            :                          * Assign the same color to all overflowed
    2878                 :            :                          * flushers, advance work_color and append to
    2879                 :            :                          * flusher_queue.  This is the start-to-wait
    2880                 :            :                          * phase for these overflowed flushers.
    2881                 :            :                          */
    2882                 :          0 :                         list_for_each_entry(tmp, &wq->flusher_overflow, list)
    2883                 :          0 :                                 tmp->flush_color = wq->work_color;
    2884                 :            : 
    2885                 :          0 :                         wq->work_color = work_next_color(wq->work_color);
    2886                 :            : 
    2887                 :          0 :                         list_splice_tail_init(&wq->flusher_overflow,
    2888                 :            :                                               &wq->flusher_queue);
    2889                 :          0 :                         flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
    2890                 :            :                 }
    2891                 :            : 
    2892                 :          0 :                 if (list_empty(&wq->flusher_queue)) {
    2893                 :          0 :                         WARN_ON_ONCE(wq->flush_color != wq->work_color);
    2894                 :            :                         break;
    2895                 :            :                 }
    2896                 :            : 
    2897                 :            :                 /*
    2898                 :            :                  * Need to flush more colors.  Make the next flusher
    2899                 :            :                  * the new first flusher and arm pwqs.
    2900                 :            :                  */
    2901                 :          0 :                 WARN_ON_ONCE(wq->flush_color == wq->work_color);
    2902                 :          0 :                 WARN_ON_ONCE(wq->flush_color != next->flush_color);
    2903                 :            : 
    2904                 :          0 :                 list_del_init(&next->list);
    2905                 :          0 :                 wq->first_flusher = next;
    2906                 :            : 
    2907                 :          0 :                 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
    2908                 :            :                         break;
    2909                 :            : 
    2910                 :            :                 /*
    2911                 :            :                  * Meh... this color is already done, clear first
    2912                 :            :                  * flusher and repeat cascading.
    2913                 :            :                  */
    2914                 :          0 :                 wq->first_flusher = NULL;
    2915                 :          0 :         }
    2916                 :            : 
    2917                 :            : out_unlock:
    2918                 :          3 :         mutex_unlock(&wq->mutex);
    2919                 :            : }
    2920                 :            : EXPORT_SYMBOL(flush_workqueue);
    2921                 :            : 
    2922                 :            : /**
    2923                 :            :  * drain_workqueue - drain a workqueue
    2924                 :            :  * @wq: workqueue to drain
    2925                 :            :  *
    2926                 :            :  * Wait until the workqueue becomes empty.  While draining is in progress,
    2927                 :            :  * only chain queueing is allowed.  IOW, only currently pending or running
    2928                 :            :  * work items on @wq can queue further work items on it.  @wq is flushed
    2929                 :            :  * repeatedly until it becomes empty.  The number of flushing is determined
    2930                 :            :  * by the depth of chaining and should be relatively short.  Whine if it
    2931                 :            :  * takes too long.
    2932                 :            :  */
    2933                 :          2 : void drain_workqueue(struct workqueue_struct *wq)
    2934                 :            : {
    2935                 :            :         unsigned int flush_cnt = 0;
    2936                 :            :         struct pool_workqueue *pwq;
    2937                 :            : 
    2938                 :            :         /*
    2939                 :            :          * __queue_work() needs to test whether there are drainers, is much
    2940                 :            :          * hotter than drain_workqueue() and already looks at @wq->flags.
    2941                 :            :          * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
    2942                 :            :          */
    2943                 :          2 :         mutex_lock(&wq->mutex);
    2944                 :          2 :         if (!wq->nr_drainers++)
    2945                 :          2 :                 wq->flags |= __WQ_DRAINING;
    2946                 :          2 :         mutex_unlock(&wq->mutex);
    2947                 :            : reflush:
    2948                 :          2 :         flush_workqueue(wq);
    2949                 :            : 
    2950                 :          2 :         mutex_lock(&wq->mutex);
    2951                 :            : 
    2952                 :          2 :         for_each_pwq(pwq, wq) {
    2953                 :            :                 bool drained;
    2954                 :            : 
    2955                 :          2 :                 spin_lock_irq(&pwq->pool->lock);
    2956                 :          2 :                 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
    2957                 :          2 :                 spin_unlock_irq(&pwq->pool->lock);
    2958                 :            : 
    2959                 :          2 :                 if (drained)
    2960                 :          2 :                         continue;
    2961                 :            : 
    2962                 :          0 :                 if (++flush_cnt == 10 ||
    2963                 :          0 :                     (flush_cnt % 100 == 0 && flush_cnt <= 1000))
    2964                 :          0 :                         pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
    2965                 :            :                                 wq->name, flush_cnt);
    2966                 :            : 
    2967                 :          0 :                 mutex_unlock(&wq->mutex);
    2968                 :          0 :                 goto reflush;
    2969                 :            :         }
    2970                 :            : 
    2971                 :          2 :         if (!--wq->nr_drainers)
    2972                 :          2 :                 wq->flags &= ~__WQ_DRAINING;
    2973                 :          2 :         mutex_unlock(&wq->mutex);
    2974                 :          2 : }
    2975                 :            : EXPORT_SYMBOL_GPL(drain_workqueue);
    2976                 :            : 
    2977                 :          3 : static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
    2978                 :            :                              bool from_cancel)
    2979                 :            : {
    2980                 :            :         struct worker *worker = NULL;
    2981                 :            :         struct worker_pool *pool;
    2982                 :            :         struct pool_workqueue *pwq;
    2983                 :            : 
    2984                 :          3 :         might_sleep();
    2985                 :            : 
    2986                 :            :         rcu_read_lock();
    2987                 :          3 :         pool = get_work_pool(work);
    2988                 :          3 :         if (!pool) {
    2989                 :            :                 rcu_read_unlock();
    2990                 :          3 :                 return false;
    2991                 :            :         }
    2992                 :            : 
    2993                 :            :         spin_lock_irq(&pool->lock);
    2994                 :            :         /* see the comment in try_to_grab_pending() with the same code */
    2995                 :            :         pwq = get_work_pwq(work);
    2996                 :          3 :         if (pwq) {
    2997                 :          3 :                 if (unlikely(pwq->pool != pool))
    2998                 :            :                         goto already_gone;
    2999                 :            :         } else {
    3000                 :          3 :                 worker = find_worker_executing_work(pool, work);
    3001                 :          3 :                 if (!worker)
    3002                 :            :                         goto already_gone;
    3003                 :          3 :                 pwq = worker->current_pwq;
    3004                 :            :         }
    3005                 :            : 
    3006                 :          3 :         check_flush_dependency(pwq->wq, work);
    3007                 :            : 
    3008                 :          3 :         insert_wq_barrier(pwq, barr, work, worker);
    3009                 :            :         spin_unlock_irq(&pool->lock);
    3010                 :            : 
    3011                 :            :         /*
    3012                 :            :          * Force a lock recursion deadlock when using flush_work() inside a
    3013                 :            :          * single-threaded or rescuer equipped workqueue.
    3014                 :            :          *
    3015                 :            :          * For single threaded workqueues the deadlock happens when the work
    3016                 :            :          * is after the work issuing the flush_work(). For rescuer equipped
    3017                 :            :          * workqueues the deadlock happens when the rescuer stalls, blocking
    3018                 :            :          * forward progress.
    3019                 :            :          */
    3020                 :            :         if (!from_cancel &&
    3021                 :            :             (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
    3022                 :            :                 lock_map_acquire(&pwq->wq->lockdep_map);
    3023                 :            :                 lock_map_release(&pwq->wq->lockdep_map);
    3024                 :            :         }
    3025                 :            :         rcu_read_unlock();
    3026                 :          3 :         return true;
    3027                 :            : already_gone:
    3028                 :            :         spin_unlock_irq(&pool->lock);
    3029                 :            :         rcu_read_unlock();
    3030                 :          3 :         return false;
    3031                 :            : }
    3032                 :            : 
    3033                 :          3 : static bool __flush_work(struct work_struct *work, bool from_cancel)
    3034                 :            : {
    3035                 :            :         struct wq_barrier barr;
    3036                 :            : 
    3037                 :          3 :         if (WARN_ON(!wq_online))
    3038                 :            :                 return false;
    3039                 :            : 
    3040                 :          3 :         if (WARN_ON(!work->func))
    3041                 :            :                 return false;
    3042                 :            : 
    3043                 :            :         if (!from_cancel) {
    3044                 :            :                 lock_map_acquire(&work->lockdep_map);
    3045                 :            :                 lock_map_release(&work->lockdep_map);
    3046                 :            :         }
    3047                 :            : 
    3048                 :          3 :         if (start_flush_work(work, &barr, from_cancel)) {
    3049                 :          3 :                 wait_for_completion(&barr.done);
    3050                 :            :                 destroy_work_on_stack(&barr.work);
    3051                 :          3 :                 return true;
    3052                 :            :         } else {
    3053                 :            :                 return false;
    3054                 :            :         }
    3055                 :            : }
    3056                 :            : 
    3057                 :            : /**
    3058                 :            :  * flush_work - wait for a work to finish executing the last queueing instance
    3059                 :            :  * @work: the work to flush
    3060                 :            :  *
    3061                 :            :  * Wait until @work has finished execution.  @work is guaranteed to be idle
    3062                 :            :  * on return if it hasn't been requeued since flush started.
    3063                 :            :  *
    3064                 :            :  * Return:
    3065                 :            :  * %true if flush_work() waited for the work to finish execution,
    3066                 :            :  * %false if it was already idle.
    3067                 :            :  */
    3068                 :          3 : bool flush_work(struct work_struct *work)
    3069                 :            : {
    3070                 :          3 :         return __flush_work(work, false);
    3071                 :            : }
    3072                 :            : EXPORT_SYMBOL_GPL(flush_work);
    3073                 :            : 
    3074                 :            : struct cwt_wait {
    3075                 :            :         wait_queue_entry_t              wait;
    3076                 :            :         struct work_struct      *work;
    3077                 :            : };
    3078                 :            : 
    3079                 :          0 : static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
    3080                 :            : {
    3081                 :            :         struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
    3082                 :            : 
    3083                 :          0 :         if (cwait->work != key)
    3084                 :            :                 return 0;
    3085                 :          0 :         return autoremove_wake_function(wait, mode, sync, key);
    3086                 :            : }
    3087                 :            : 
    3088                 :          3 : static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
    3089                 :            : {
    3090                 :            :         static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
    3091                 :            :         unsigned long flags;
    3092                 :            :         int ret;
    3093                 :            : 
    3094                 :            :         do {
    3095                 :          3 :                 ret = try_to_grab_pending(work, is_dwork, &flags);
    3096                 :            :                 /*
    3097                 :            :                  * If someone else is already canceling, wait for it to
    3098                 :            :                  * finish.  flush_work() doesn't work for PREEMPT_NONE
    3099                 :            :                  * because we may get scheduled between @work's completion
    3100                 :            :                  * and the other canceling task resuming and clearing
    3101                 :            :                  * CANCELING - flush_work() will return false immediately
    3102                 :            :                  * as @work is no longer busy, try_to_grab_pending() will
    3103                 :            :                  * return -ENOENT as @work is still being canceled and the
    3104                 :            :                  * other canceling task won't be able to clear CANCELING as
    3105                 :            :                  * we're hogging the CPU.
    3106                 :            :                  *
    3107                 :            :                  * Let's wait for completion using a waitqueue.  As this
    3108                 :            :                  * may lead to the thundering herd problem, use a custom
    3109                 :            :                  * wake function which matches @work along with exclusive
    3110                 :            :                  * wait and wakeup.
    3111                 :            :                  */
    3112                 :          3 :                 if (unlikely(ret == -ENOENT)) {
    3113                 :            :                         struct cwt_wait cwait;
    3114                 :            : 
    3115                 :          0 :                         init_wait(&cwait.wait);
    3116                 :          0 :                         cwait.wait.func = cwt_wakefn;
    3117                 :          0 :                         cwait.work = work;
    3118                 :            : 
    3119                 :          0 :                         prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
    3120                 :            :                                                   TASK_UNINTERRUPTIBLE);
    3121                 :          0 :                         if (work_is_canceling(work))
    3122                 :          0 :                                 schedule();
    3123                 :          0 :                         finish_wait(&cancel_waitq, &cwait.wait);
    3124                 :            :                 }
    3125                 :          3 :         } while (unlikely(ret < 0));
    3126                 :            : 
    3127                 :            :         /* tell other tasks trying to grab @work to back off */
    3128                 :          3 :         mark_work_canceling(work);
    3129                 :          3 :         local_irq_restore(flags);
    3130                 :            : 
    3131                 :            :         /*
    3132                 :            :          * This allows canceling during early boot.  We know that @work
    3133                 :            :          * isn't executing.
    3134                 :            :          */
    3135                 :          3 :         if (wq_online)
    3136                 :          3 :                 __flush_work(work, true);
    3137                 :            : 
    3138                 :            :         clear_work_data(work);
    3139                 :            : 
    3140                 :            :         /*
    3141                 :            :          * Paired with prepare_to_wait() above so that either
    3142                 :            :          * waitqueue_active() is visible here or !work_is_canceling() is
    3143                 :            :          * visible there.
    3144                 :            :          */
    3145                 :          3 :         smp_mb();
    3146                 :          3 :         if (waitqueue_active(&cancel_waitq))
    3147                 :          0 :                 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
    3148                 :            : 
    3149                 :          3 :         return ret;
    3150                 :            : }
    3151                 :            : 
    3152                 :            : /**
    3153                 :            :  * cancel_work_sync - cancel a work and wait for it to finish
    3154                 :            :  * @work: the work to cancel
    3155                 :            :  *
    3156                 :            :  * Cancel @work and wait for its execution to finish.  This function
    3157                 :            :  * can be used even if the work re-queues itself or migrates to
    3158                 :            :  * another workqueue.  On return from this function, @work is
    3159                 :            :  * guaranteed to be not pending or executing on any CPU.
    3160                 :            :  *
    3161                 :            :  * cancel_work_sync(&delayed_work->work) must not be used for
    3162                 :            :  * delayed_work's.  Use cancel_delayed_work_sync() instead.
    3163                 :            :  *
    3164                 :            :  * The caller must ensure that the workqueue on which @work was last
    3165                 :            :  * queued can't be destroyed before this function returns.
    3166                 :            :  *
    3167                 :            :  * Return:
    3168                 :            :  * %true if @work was pending, %false otherwise.
    3169                 :            :  */
    3170                 :          3 : bool cancel_work_sync(struct work_struct *work)
    3171                 :            : {
    3172                 :          3 :         return __cancel_work_timer(work, false);
    3173                 :            : }
    3174                 :            : EXPORT_SYMBOL_GPL(cancel_work_sync);
    3175                 :            : 
    3176                 :            : /**
    3177                 :            :  * flush_delayed_work - wait for a dwork to finish executing the last queueing
    3178                 :            :  * @dwork: the delayed work to flush
    3179                 :            :  *
    3180                 :            :  * Delayed timer is cancelled and the pending work is queued for
    3181                 :            :  * immediate execution.  Like flush_work(), this function only
    3182                 :            :  * considers the last queueing instance of @dwork.
    3183                 :            :  *
    3184                 :            :  * Return:
    3185                 :            :  * %true if flush_work() waited for the work to finish execution,
    3186                 :            :  * %false if it was already idle.
    3187                 :            :  */
    3188                 :          3 : bool flush_delayed_work(struct delayed_work *dwork)
    3189                 :            : {
    3190                 :          3 :         local_irq_disable();
    3191                 :          3 :         if (del_timer_sync(&dwork->timer))
    3192                 :          3 :                 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
    3193                 :          3 :         local_irq_enable();
    3194                 :          3 :         return flush_work(&dwork->work);
    3195                 :            : }
    3196                 :            : EXPORT_SYMBOL(flush_delayed_work);
    3197                 :            : 
    3198                 :            : /**
    3199                 :            :  * flush_rcu_work - wait for a rwork to finish executing the last queueing
    3200                 :            :  * @rwork: the rcu work to flush
    3201                 :            :  *
    3202                 :            :  * Return:
    3203                 :            :  * %true if flush_rcu_work() waited for the work to finish execution,
    3204                 :            :  * %false if it was already idle.
    3205                 :            :  */
    3206                 :          0 : bool flush_rcu_work(struct rcu_work *rwork)
    3207                 :            : {
    3208                 :          0 :         if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
    3209                 :          0 :                 rcu_barrier();
    3210                 :          0 :                 flush_work(&rwork->work);
    3211                 :          0 :                 return true;
    3212                 :            :         } else {
    3213                 :          0 :                 return flush_work(&rwork->work);
    3214                 :            :         }
    3215                 :            : }
    3216                 :            : EXPORT_SYMBOL(flush_rcu_work);
    3217                 :            : 
    3218                 :          3 : static bool __cancel_work(struct work_struct *work, bool is_dwork)
    3219                 :            : {
    3220                 :            :         unsigned long flags;
    3221                 :            :         int ret;
    3222                 :            : 
    3223                 :            :         do {
    3224                 :          3 :                 ret = try_to_grab_pending(work, is_dwork, &flags);
    3225                 :          3 :         } while (unlikely(ret == -EAGAIN));
    3226                 :            : 
    3227                 :          3 :         if (unlikely(ret < 0))
    3228                 :            :                 return false;
    3229                 :            : 
    3230                 :            :         set_work_pool_and_clear_pending(work, get_work_pool_id(work));
    3231                 :          3 :         local_irq_restore(flags);
    3232                 :          3 :         return ret;
    3233                 :            : }
    3234                 :            : 
    3235                 :            : /**
    3236                 :            :  * cancel_delayed_work - cancel a delayed work
    3237                 :            :  * @dwork: delayed_work to cancel
    3238                 :            :  *
    3239                 :            :  * Kill off a pending delayed_work.
    3240                 :            :  *
    3241                 :            :  * Return: %true if @dwork was pending and canceled; %false if it wasn't
    3242                 :            :  * pending.
    3243                 :            :  *
    3244                 :            :  * Note:
    3245                 :            :  * The work callback function may still be running on return, unless
    3246                 :            :  * it returns %true and the work doesn't re-arm itself.  Explicitly flush or
    3247                 :            :  * use cancel_delayed_work_sync() to wait on it.
    3248                 :            :  *
    3249                 :            :  * This function is safe to call from any context including IRQ handler.
    3250                 :            :  */
    3251                 :          3 : bool cancel_delayed_work(struct delayed_work *dwork)
    3252                 :            : {
    3253                 :          3 :         return __cancel_work(&dwork->work, true);
    3254                 :            : }
    3255                 :            : EXPORT_SYMBOL(cancel_delayed_work);
    3256                 :            : 
    3257                 :            : /**
    3258                 :            :  * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
    3259                 :            :  * @dwork: the delayed work cancel
    3260                 :            :  *
    3261                 :            :  * This is cancel_work_sync() for delayed works.
    3262                 :            :  *
    3263                 :            :  * Return:
    3264                 :            :  * %true if @dwork was pending, %false otherwise.
    3265                 :            :  */
    3266                 :          0 : bool cancel_delayed_work_sync(struct delayed_work *dwork)
    3267                 :            : {
    3268                 :          0 :         return __cancel_work_timer(&dwork->work, true);
    3269                 :            : }
    3270                 :            : EXPORT_SYMBOL(cancel_delayed_work_sync);
    3271                 :            : 
    3272                 :            : /**
    3273                 :            :  * schedule_on_each_cpu - execute a function synchronously on each online CPU
    3274                 :            :  * @func: the function to call
    3275                 :            :  *
    3276                 :            :  * schedule_on_each_cpu() executes @func on each online CPU using the
    3277                 :            :  * system workqueue and blocks until all CPUs have completed.
    3278                 :            :  * schedule_on_each_cpu() is very slow.
    3279                 :            :  *
    3280                 :            :  * Return:
    3281                 :            :  * 0 on success, -errno on failure.
    3282                 :            :  */
    3283                 :          0 : int schedule_on_each_cpu(work_func_t func)
    3284                 :            : {
    3285                 :            :         int cpu;
    3286                 :            :         struct work_struct __percpu *works;
    3287                 :            : 
    3288                 :          0 :         works = alloc_percpu(struct work_struct);
    3289                 :          0 :         if (!works)
    3290                 :            :                 return -ENOMEM;
    3291                 :            : 
    3292                 :            :         get_online_cpus();
    3293                 :            : 
    3294                 :          0 :         for_each_online_cpu(cpu) {
    3295                 :          0 :                 struct work_struct *work = per_cpu_ptr(works, cpu);
    3296                 :            : 
    3297                 :          0 :                 INIT_WORK(work, func);
    3298                 :            :                 schedule_work_on(cpu, work);
    3299                 :            :         }
    3300                 :            : 
    3301                 :          0 :         for_each_online_cpu(cpu)
    3302                 :          0 :                 flush_work(per_cpu_ptr(works, cpu));
    3303                 :            : 
    3304                 :            :         put_online_cpus();
    3305                 :          0 :         free_percpu(works);
    3306                 :          0 :         return 0;
    3307                 :            : }
    3308                 :            : 
    3309                 :            : /**
    3310                 :            :  * execute_in_process_context - reliably execute the routine with user context
    3311                 :            :  * @fn:         the function to execute
    3312                 :            :  * @ew:         guaranteed storage for the execute work structure (must
    3313                 :            :  *              be available when the work executes)
    3314                 :            :  *
    3315                 :            :  * Executes the function immediately if process context is available,
    3316                 :            :  * otherwise schedules the function for delayed execution.
    3317                 :            :  *
    3318                 :            :  * Return:      0 - function was executed
    3319                 :            :  *              1 - function was scheduled for execution
    3320                 :            :  */
    3321                 :          0 : int execute_in_process_context(work_func_t fn, struct execute_work *ew)
    3322                 :            : {
    3323                 :          0 :         if (!in_interrupt()) {
    3324                 :          0 :                 fn(&ew->work);
    3325                 :          0 :                 return 0;
    3326                 :            :         }
    3327                 :            : 
    3328                 :          0 :         INIT_WORK(&ew->work, fn);
    3329                 :          0 :         schedule_work(&ew->work);
    3330                 :            : 
    3331                 :          0 :         return 1;
    3332                 :            : }
    3333                 :            : EXPORT_SYMBOL_GPL(execute_in_process_context);
    3334                 :            : 
    3335                 :            : /**
    3336                 :            :  * free_workqueue_attrs - free a workqueue_attrs
    3337                 :            :  * @attrs: workqueue_attrs to free
    3338                 :            :  *
    3339                 :            :  * Undo alloc_workqueue_attrs().
    3340                 :            :  */
    3341                 :          0 : void free_workqueue_attrs(struct workqueue_attrs *attrs)
    3342                 :            : {
    3343                 :          3 :         if (attrs) {
    3344                 :            :                 free_cpumask_var(attrs->cpumask);
    3345                 :          3 :                 kfree(attrs);
    3346                 :            :         }
    3347                 :          0 : }
    3348                 :            : 
    3349                 :            : /**
    3350                 :            :  * alloc_workqueue_attrs - allocate a workqueue_attrs
    3351                 :            :  *
    3352                 :            :  * Allocate a new workqueue_attrs, initialize with default settings and
    3353                 :            :  * return it.
    3354                 :            :  *
    3355                 :            :  * Return: The allocated new workqueue_attr on success. %NULL on failure.
    3356                 :            :  */
    3357                 :          3 : struct workqueue_attrs *alloc_workqueue_attrs(void)
    3358                 :            : {
    3359                 :            :         struct workqueue_attrs *attrs;
    3360                 :            : 
    3361                 :          3 :         attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
    3362                 :          3 :         if (!attrs)
    3363                 :            :                 goto fail;
    3364                 :            :         if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
    3365                 :            :                 goto fail;
    3366                 :            : 
    3367                 :            :         cpumask_copy(attrs->cpumask, cpu_possible_mask);
    3368                 :          3 :         return attrs;
    3369                 :            : fail:
    3370                 :            :         free_workqueue_attrs(attrs);
    3371                 :            :         return NULL;
    3372                 :            : }
    3373                 :            : 
    3374                 :            : static void copy_workqueue_attrs(struct workqueue_attrs *to,
    3375                 :            :                                  const struct workqueue_attrs *from)
    3376                 :            : {
    3377                 :          3 :         to->nice = from->nice;
    3378                 :            :         cpumask_copy(to->cpumask, from->cpumask);
    3379                 :            :         /*
    3380                 :            :          * Unlike hash and equality test, this function doesn't ignore
    3381                 :            :          * ->no_numa as it is used for both pool and wq attrs.  Instead,
    3382                 :            :          * get_unbound_pool() explicitly clears ->no_numa after copying.
    3383                 :            :          */
    3384                 :          3 :         to->no_numa = from->no_numa;
    3385                 :            : }
    3386                 :            : 
    3387                 :            : /* hash value of the content of @attr */
    3388                 :          3 : static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
    3389                 :            : {
    3390                 :            :         u32 hash = 0;
    3391                 :            : 
    3392                 :          3 :         hash = jhash_1word(attrs->nice, hash);
    3393                 :          3 :         hash = jhash(cpumask_bits(attrs->cpumask),
    3394                 :            :                      BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
    3395                 :          3 :         return hash;
    3396                 :            : }
    3397                 :            : 
    3398                 :            : /* content equality test */
    3399                 :          3 : static bool wqattrs_equal(const struct workqueue_attrs *a,
    3400                 :            :                           const struct workqueue_attrs *b)
    3401                 :            : {
    3402                 :          3 :         if (a->nice != b->nice)
    3403                 :            :                 return false;
    3404                 :          3 :         if (!cpumask_equal(a->cpumask, b->cpumask))
    3405                 :            :                 return false;
    3406                 :          3 :         return true;
    3407                 :            : }
    3408                 :            : 
    3409                 :            : /**
    3410                 :            :  * init_worker_pool - initialize a newly zalloc'd worker_pool
    3411                 :            :  * @pool: worker_pool to initialize
    3412                 :            :  *
    3413                 :            :  * Initialize a newly zalloc'd @pool.  It also allocates @pool->attrs.
    3414                 :            :  *
    3415                 :            :  * Return: 0 on success, -errno on failure.  Even on failure, all fields
    3416                 :            :  * inside @pool proper are initialized and put_unbound_pool() can be called
    3417                 :            :  * on @pool safely to release it.
    3418                 :            :  */
    3419                 :          3 : static int init_worker_pool(struct worker_pool *pool)
    3420                 :            : {
    3421                 :          3 :         spin_lock_init(&pool->lock);
    3422                 :          3 :         pool->id = -1;
    3423                 :          3 :         pool->cpu = -1;
    3424                 :          3 :         pool->node = NUMA_NO_NODE;
    3425                 :          3 :         pool->flags |= POOL_DISASSOCIATED;
    3426                 :          3 :         pool->watchdog_ts = jiffies;
    3427                 :          3 :         INIT_LIST_HEAD(&pool->worklist);
    3428                 :          3 :         INIT_LIST_HEAD(&pool->idle_list);
    3429                 :          3 :         hash_init(pool->busy_hash);
    3430                 :            : 
    3431                 :          3 :         timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
    3432                 :            : 
    3433                 :          3 :         timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
    3434                 :            : 
    3435                 :          3 :         INIT_LIST_HEAD(&pool->workers);
    3436                 :            : 
    3437                 :            :         ida_init(&pool->worker_ida);
    3438                 :            :         INIT_HLIST_NODE(&pool->hash_node);
    3439                 :          3 :         pool->refcnt = 1;
    3440                 :            : 
    3441                 :            :         /* shouldn't fail above this point */
    3442                 :          3 :         pool->attrs = alloc_workqueue_attrs();
    3443                 :          3 :         if (!pool->attrs)
    3444                 :            :                 return -ENOMEM;
    3445                 :          3 :         return 0;
    3446                 :            : }
    3447                 :            : 
    3448                 :            : #ifdef CONFIG_LOCKDEP
    3449                 :            : static void wq_init_lockdep(struct workqueue_struct *wq)
    3450                 :            : {
    3451                 :            :         char *lock_name;
    3452                 :            : 
    3453                 :            :         lockdep_register_key(&wq->key);
    3454                 :            :         lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
    3455                 :            :         if (!lock_name)
    3456                 :            :                 lock_name = wq->name;
    3457                 :            : 
    3458                 :            :         wq->lock_name = lock_name;
    3459                 :            :         lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
    3460                 :            : }
    3461                 :            : 
    3462                 :            : static void wq_unregister_lockdep(struct workqueue_struct *wq)
    3463                 :            : {
    3464                 :            :         lockdep_unregister_key(&wq->key);
    3465                 :            : }
    3466                 :            : 
    3467                 :            : static void wq_free_lockdep(struct workqueue_struct *wq)
    3468                 :            : {
    3469                 :            :         if (wq->lock_name != wq->name)
    3470                 :            :                 kfree(wq->lock_name);
    3471                 :            : }
    3472                 :            : #else
    3473                 :            : static void wq_init_lockdep(struct workqueue_struct *wq)
    3474                 :            : {
    3475                 :            : }
    3476                 :            : 
    3477                 :            : static void wq_unregister_lockdep(struct workqueue_struct *wq)
    3478                 :            : {
    3479                 :            : }
    3480                 :            : 
    3481                 :            : static void wq_free_lockdep(struct workqueue_struct *wq)
    3482                 :            : {
    3483                 :            : }
    3484                 :            : #endif
    3485                 :            : 
    3486                 :          2 : static void rcu_free_wq(struct rcu_head *rcu)
    3487                 :            : {
    3488                 :            :         struct workqueue_struct *wq =
    3489                 :          2 :                 container_of(rcu, struct workqueue_struct, rcu);
    3490                 :            : 
    3491                 :            :         wq_free_lockdep(wq);
    3492                 :            : 
    3493                 :          2 :         if (!(wq->flags & WQ_UNBOUND))
    3494                 :          0 :                 free_percpu(wq->cpu_pwqs);
    3495                 :            :         else
    3496                 :          2 :                 free_workqueue_attrs(wq->unbound_attrs);
    3497                 :            : 
    3498                 :          2 :         kfree(wq->rescuer);
    3499                 :          2 :         kfree(wq);
    3500                 :          2 : }
    3501                 :            : 
    3502                 :          0 : static void rcu_free_pool(struct rcu_head *rcu)
    3503                 :            : {
    3504                 :          0 :         struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
    3505                 :            : 
    3506                 :          0 :         ida_destroy(&pool->worker_ida);
    3507                 :          0 :         free_workqueue_attrs(pool->attrs);
    3508                 :          0 :         kfree(pool);
    3509                 :          0 : }
    3510                 :            : 
    3511                 :            : /**
    3512                 :            :  * put_unbound_pool - put a worker_pool
    3513                 :            :  * @pool: worker_pool to put
    3514                 :            :  *
    3515                 :            :  * Put @pool.  If its refcnt reaches zero, it gets destroyed in RCU
    3516                 :            :  * safe manner.  get_unbound_pool() calls this function on its failure path
    3517                 :            :  * and this function should be able to release pools which went through,
    3518                 :            :  * successfully or not, init_worker_pool().
    3519                 :            :  *
    3520                 :            :  * Should be called with wq_pool_mutex held.
    3521                 :            :  */
    3522                 :          2 : static void put_unbound_pool(struct worker_pool *pool)
    3523                 :            : {
    3524                 :          2 :         DECLARE_COMPLETION_ONSTACK(detach_completion);
    3525                 :            :         struct worker *worker;
    3526                 :            : 
    3527                 :            :         lockdep_assert_held(&wq_pool_mutex);
    3528                 :            : 
    3529                 :          2 :         if (--pool->refcnt)
    3530                 :          2 :                 return;
    3531                 :            : 
    3532                 :            :         /* sanity checks */
    3533                 :          0 :         if (WARN_ON(!(pool->cpu < 0)) ||
    3534                 :          0 :             WARN_ON(!list_empty(&pool->worklist)))
    3535                 :            :                 return;
    3536                 :            : 
    3537                 :            :         /* release id and unhash */
    3538                 :          0 :         if (pool->id >= 0)
    3539                 :          0 :                 idr_remove(&worker_pool_idr, pool->id);
    3540                 :            :         hash_del(&pool->hash_node);
    3541                 :            : 
    3542                 :            :         /*
    3543                 :            :          * Become the manager and destroy all workers.  This prevents
    3544                 :            :          * @pool's workers from blocking on attach_mutex.  We're the last
    3545                 :            :          * manager and @pool gets freed with the flag set.
    3546                 :            :          */
    3547                 :            :         spin_lock_irq(&pool->lock);
    3548                 :          0 :         wait_event_lock_irq(wq_manager_wait,
    3549                 :            :                             !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
    3550                 :          0 :         pool->flags |= POOL_MANAGER_ACTIVE;
    3551                 :            : 
    3552                 :          0 :         while ((worker = first_idle_worker(pool)))
    3553                 :          0 :                 destroy_worker(worker);
    3554                 :          0 :         WARN_ON(pool->nr_workers || pool->nr_idle);
    3555                 :            :         spin_unlock_irq(&pool->lock);
    3556                 :            : 
    3557                 :          0 :         mutex_lock(&wq_pool_attach_mutex);
    3558                 :          0 :         if (!list_empty(&pool->workers))
    3559                 :          0 :                 pool->detach_completion = &detach_completion;
    3560                 :          0 :         mutex_unlock(&wq_pool_attach_mutex);
    3561                 :            : 
    3562                 :          0 :         if (pool->detach_completion)
    3563                 :          0 :                 wait_for_completion(pool->detach_completion);
    3564                 :            : 
    3565                 :            :         /* shut down the timers */
    3566                 :          0 :         del_timer_sync(&pool->idle_timer);
    3567                 :          0 :         del_timer_sync(&pool->mayday_timer);
    3568                 :            : 
    3569                 :            :         /* RCU protected to allow dereferences from get_work_pool() */
    3570                 :          0 :         call_rcu(&pool->rcu, rcu_free_pool);
    3571                 :            : }
    3572                 :            : 
    3573                 :            : /**
    3574                 :            :  * get_unbound_pool - get a worker_pool with the specified attributes
    3575                 :            :  * @attrs: the attributes of the worker_pool to get
    3576                 :            :  *
    3577                 :            :  * Obtain a worker_pool which has the same attributes as @attrs, bump the
    3578                 :            :  * reference count and return it.  If there already is a matching
    3579                 :            :  * worker_pool, it will be used; otherwise, this function attempts to
    3580                 :            :  * create a new one.
    3581                 :            :  *
    3582                 :            :  * Should be called with wq_pool_mutex held.
    3583                 :            :  *
    3584                 :            :  * Return: On success, a worker_pool with the same attributes as @attrs.
    3585                 :            :  * On failure, %NULL.
    3586                 :            :  */
    3587                 :          3 : static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
    3588                 :            : {
    3589                 :          3 :         u32 hash = wqattrs_hash(attrs);
    3590                 :            :         struct worker_pool *pool;
    3591                 :            :         int node;
    3592                 :            :         int target_node = NUMA_NO_NODE;
    3593                 :            : 
    3594                 :            :         lockdep_assert_held(&wq_pool_mutex);
    3595                 :            : 
    3596                 :            :         /* do we already have a matching pool? */
    3597                 :          3 :         hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
    3598                 :          3 :                 if (wqattrs_equal(pool->attrs, attrs)) {
    3599                 :          3 :                         pool->refcnt++;
    3600                 :          3 :                         return pool;
    3601                 :            :                 }
    3602                 :            :         }
    3603                 :            : 
    3604                 :            :         /* if cpumask is contained inside a NUMA node, we belong to that node */
    3605                 :          3 :         if (wq_numa_enabled) {
    3606                 :          0 :                 for_each_node(node) {
    3607                 :          0 :                         if (cpumask_subset(attrs->cpumask,
    3608                 :          0 :                                            wq_numa_possible_cpumask[node])) {
    3609                 :          0 :                                 target_node = node;
    3610                 :          0 :                                 break;
    3611                 :            :                         }
    3612                 :            :                 }
    3613                 :            :         }
    3614                 :            : 
    3615                 :            :         /* nope, create a new one */
    3616                 :          3 :         pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
    3617                 :          3 :         if (!pool || init_worker_pool(pool) < 0)
    3618                 :            :                 goto fail;
    3619                 :            : 
    3620                 :            :         lockdep_set_subclass(&pool->lock, 1);    /* see put_pwq() */
    3621                 :          3 :         copy_workqueue_attrs(pool->attrs, attrs);
    3622                 :          3 :         pool->node = target_node;
    3623                 :            : 
    3624                 :            :         /*
    3625                 :            :          * no_numa isn't a worker_pool attribute, always clear it.  See
    3626                 :            :          * 'struct workqueue_attrs' comments for detail.
    3627                 :            :          */
    3628                 :          3 :         pool->attrs->no_numa = false;
    3629                 :            : 
    3630                 :          3 :         if (worker_pool_assign_id(pool) < 0)
    3631                 :            :                 goto fail;
    3632                 :            : 
    3633                 :            :         /* create and start the initial worker */
    3634                 :          3 :         if (wq_online && !create_worker(pool))
    3635                 :            :                 goto fail;
    3636                 :            : 
    3637                 :            :         /* install */
    3638                 :          3 :         hash_add(unbound_pool_hash, &pool->hash_node, hash);
    3639                 :            : 
    3640                 :          3 :         return pool;
    3641                 :            : fail:
    3642                 :          0 :         if (pool)
    3643                 :          0 :                 put_unbound_pool(pool);
    3644                 :            :         return NULL;
    3645                 :            : }
    3646                 :            : 
    3647                 :          2 : static void rcu_free_pwq(struct rcu_head *rcu)
    3648                 :            : {
    3649                 :          2 :         kmem_cache_free(pwq_cache,
    3650                 :          2 :                         container_of(rcu, struct pool_workqueue, rcu));
    3651                 :          2 : }
    3652                 :            : 
    3653                 :            : /*
    3654                 :            :  * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
    3655                 :            :  * and needs to be destroyed.
    3656                 :            :  */
    3657                 :          2 : static void pwq_unbound_release_workfn(struct work_struct *work)
    3658                 :            : {
    3659                 :            :         struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
    3660                 :            :                                                   unbound_release_work);
    3661                 :          2 :         struct workqueue_struct *wq = pwq->wq;
    3662                 :          2 :         struct worker_pool *pool = pwq->pool;
    3663                 :            :         bool is_last;
    3664                 :            : 
    3665                 :          2 :         if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
    3666                 :          2 :                 return;
    3667                 :            : 
    3668                 :          2 :         mutex_lock(&wq->mutex);
    3669                 :            :         list_del_rcu(&pwq->pwqs_node);
    3670                 :          2 :         is_last = list_empty(&wq->pwqs);
    3671                 :          2 :         mutex_unlock(&wq->mutex);
    3672                 :            : 
    3673                 :          2 :         mutex_lock(&wq_pool_mutex);
    3674                 :          2 :         put_unbound_pool(pool);
    3675                 :          2 :         mutex_unlock(&wq_pool_mutex);
    3676                 :            : 
    3677                 :          2 :         call_rcu(&pwq->rcu, rcu_free_pwq);
    3678                 :            : 
    3679                 :            :         /*
    3680                 :            :          * If we're the last pwq going away, @wq is already dead and no one
    3681                 :            :          * is gonna access it anymore.  Schedule RCU free.
    3682                 :            :          */
    3683                 :          2 :         if (is_last) {
    3684                 :            :                 wq_unregister_lockdep(wq);
    3685                 :          2 :                 call_rcu(&wq->rcu, rcu_free_wq);
    3686                 :            :         }
    3687                 :            : }
    3688                 :            : 
    3689                 :            : /**
    3690                 :            :  * pwq_adjust_max_active - update a pwq's max_active to the current setting
    3691                 :            :  * @pwq: target pool_workqueue
    3692                 :            :  *
    3693                 :            :  * If @pwq isn't freezing, set @pwq->max_active to the associated
    3694                 :            :  * workqueue's saved_max_active and activate delayed work items
    3695                 :            :  * accordingly.  If @pwq is freezing, clear @pwq->max_active to zero.
    3696                 :            :  */
    3697                 :          3 : static void pwq_adjust_max_active(struct pool_workqueue *pwq)
    3698                 :            : {
    3699                 :          3 :         struct workqueue_struct *wq = pwq->wq;
    3700                 :          3 :         bool freezable = wq->flags & WQ_FREEZABLE;
    3701                 :            :         unsigned long flags;
    3702                 :            : 
    3703                 :            :         /* for @wq->saved_max_active */
    3704                 :            :         lockdep_assert_held(&wq->mutex);
    3705                 :            : 
    3706                 :            :         /* fast exit for non-freezable wqs */
    3707                 :          3 :         if (!freezable && pwq->max_active == wq->saved_max_active)
    3708                 :          3 :                 return;
    3709                 :            : 
    3710                 :            :         /* this function can be called during early boot w/ irq disabled */
    3711                 :          3 :         spin_lock_irqsave(&pwq->pool->lock, flags);
    3712                 :            : 
    3713                 :            :         /*
    3714                 :            :          * During [un]freezing, the caller is responsible for ensuring that
    3715                 :            :          * this function is called at least once after @workqueue_freezing
    3716                 :            :          * is updated and visible.
    3717                 :            :          */
    3718                 :          3 :         if (!freezable || !workqueue_freezing) {
    3719                 :          3 :                 pwq->max_active = wq->saved_max_active;
    3720                 :            : 
    3721                 :          3 :                 while (!list_empty(&pwq->delayed_works) &&
    3722                 :          0 :                        pwq->nr_active < pwq->max_active)
    3723                 :            :                         pwq_activate_first_delayed(pwq);
    3724                 :            : 
    3725                 :            :                 /*
    3726                 :            :                  * Need to kick a worker after thawed or an unbound wq's
    3727                 :            :                  * max_active is bumped.  It's a slow path.  Do it always.
    3728                 :            :                  */
    3729                 :          3 :                 wake_up_worker(pwq->pool);
    3730                 :            :         } else {
    3731                 :          0 :                 pwq->max_active = 0;
    3732                 :            :         }
    3733                 :            : 
    3734                 :          3 :         spin_unlock_irqrestore(&pwq->pool->lock, flags);
    3735                 :            : }
    3736                 :            : 
    3737                 :            : /* initialize newly alloced @pwq which is associated with @wq and @pool */
    3738                 :          3 : static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
    3739                 :            :                      struct worker_pool *pool)
    3740                 :            : {
    3741                 :          3 :         BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
    3742                 :            : 
    3743                 :          3 :         memset(pwq, 0, sizeof(*pwq));
    3744                 :            : 
    3745                 :          3 :         pwq->pool = pool;
    3746                 :          3 :         pwq->wq = wq;
    3747                 :          3 :         pwq->flush_color = -1;
    3748                 :          3 :         pwq->refcnt = 1;
    3749                 :          3 :         INIT_LIST_HEAD(&pwq->delayed_works);
    3750                 :          3 :         INIT_LIST_HEAD(&pwq->pwqs_node);
    3751                 :          3 :         INIT_LIST_HEAD(&pwq->mayday_node);
    3752                 :          3 :         INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
    3753                 :          3 : }
    3754                 :            : 
    3755                 :            : /* sync @pwq with the current state of its associated wq and link it */
    3756                 :          3 : static void link_pwq(struct pool_workqueue *pwq)
    3757                 :            : {
    3758                 :          3 :         struct workqueue_struct *wq = pwq->wq;
    3759                 :            : 
    3760                 :            :         lockdep_assert_held(&wq->mutex);
    3761                 :            : 
    3762                 :            :         /* may be called multiple times, ignore if already linked */
    3763                 :          3 :         if (!list_empty(&pwq->pwqs_node))
    3764                 :          3 :                 return;
    3765                 :            : 
    3766                 :            :         /* set the matching work_color */
    3767                 :          3 :         pwq->work_color = wq->work_color;
    3768                 :            : 
    3769                 :            :         /* sync max_active to the current setting */
    3770                 :          3 :         pwq_adjust_max_active(pwq);
    3771                 :            : 
    3772                 :            :         /* link in @pwq */
    3773                 :          3 :         list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
    3774                 :            : }
    3775                 :            : 
    3776                 :            : /* obtain a pool matching @attr and create a pwq associating the pool and @wq */
    3777                 :          3 : static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
    3778                 :            :                                         const struct workqueue_attrs *attrs)
    3779                 :            : {
    3780                 :            :         struct worker_pool *pool;
    3781                 :            :         struct pool_workqueue *pwq;
    3782                 :            : 
    3783                 :            :         lockdep_assert_held(&wq_pool_mutex);
    3784                 :            : 
    3785                 :          3 :         pool = get_unbound_pool(attrs);
    3786                 :          3 :         if (!pool)
    3787                 :            :                 return NULL;
    3788                 :            : 
    3789                 :          3 :         pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
    3790                 :          3 :         if (!pwq) {
    3791                 :          0 :                 put_unbound_pool(pool);
    3792                 :          0 :                 return NULL;
    3793                 :            :         }
    3794                 :            : 
    3795                 :          3 :         init_pwq(pwq, wq, pool);
    3796                 :          3 :         return pwq;
    3797                 :            : }
    3798                 :            : 
    3799                 :            : /**
    3800                 :            :  * wq_calc_node_cpumask - calculate a wq_attrs' cpumask for the specified node
    3801                 :            :  * @attrs: the wq_attrs of the default pwq of the target workqueue
    3802                 :            :  * @node: the target NUMA node
    3803                 :            :  * @cpu_going_down: if >= 0, the CPU to consider as offline
    3804                 :            :  * @cpumask: outarg, the resulting cpumask
    3805                 :            :  *
    3806                 :            :  * Calculate the cpumask a workqueue with @attrs should use on @node.  If
    3807                 :            :  * @cpu_going_down is >= 0, that cpu is considered offline during
    3808                 :            :  * calculation.  The result is stored in @cpumask.
    3809                 :            :  *
    3810                 :            :  * If NUMA affinity is not enabled, @attrs->cpumask is always used.  If
    3811                 :            :  * enabled and @node has online CPUs requested by @attrs, the returned
    3812                 :            :  * cpumask is the intersection of the possible CPUs of @node and
    3813                 :            :  * @attrs->cpumask.
    3814                 :            :  *
    3815                 :            :  * The caller is responsible for ensuring that the cpumask of @node stays
    3816                 :            :  * stable.
    3817                 :            :  *
    3818                 :            :  * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
    3819                 :            :  * %false if equal.
    3820                 :            :  */
    3821                 :          3 : static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
    3822                 :            :                                  int cpu_going_down, cpumask_t *cpumask)
    3823                 :            : {
    3824                 :          3 :         if (!wq_numa_enabled || attrs->no_numa)
    3825                 :            :                 goto use_dfl;
    3826                 :            : 
    3827                 :            :         /* does @node have any online CPUs @attrs wants? */
    3828                 :            :         cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
    3829                 :          0 :         if (cpu_going_down >= 0)
    3830                 :            :                 cpumask_clear_cpu(cpu_going_down, cpumask);
    3831                 :            : 
    3832                 :          0 :         if (cpumask_empty(cpumask))
    3833                 :            :                 goto use_dfl;
    3834                 :            : 
    3835                 :            :         /* yeap, return possible CPUs in @node that @attrs wants */
    3836                 :          0 :         cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
    3837                 :            : 
    3838                 :          0 :         if (cpumask_empty(cpumask)) {
    3839                 :          0 :                 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
    3840                 :            :                                 "possible intersect\n");
    3841                 :            :                 return false;
    3842                 :            :         }
    3843                 :            : 
    3844                 :          0 :         return !cpumask_equal(cpumask, attrs->cpumask);
    3845                 :            : 
    3846                 :            : use_dfl:
    3847                 :            :         cpumask_copy(cpumask, attrs->cpumask);
    3848                 :          3 :         return false;
    3849                 :            : }
    3850                 :            : 
    3851                 :            : /* install @pwq into @wq's numa_pwq_tbl[] for @node and return the old pwq */
    3852                 :            : static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
    3853                 :            :                                                    int node,
    3854                 :            :                                                    struct pool_workqueue *pwq)
    3855                 :            : {
    3856                 :            :         struct pool_workqueue *old_pwq;
    3857                 :            : 
    3858                 :            :         lockdep_assert_held(&wq_pool_mutex);
    3859                 :            :         lockdep_assert_held(&wq->mutex);
    3860                 :            : 
    3861                 :            :         /* link_pwq() can handle duplicate calls */
    3862                 :          3 :         link_pwq(pwq);
    3863                 :            : 
    3864                 :          3 :         old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
    3865                 :          3 :         rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
    3866                 :            :         return old_pwq;
    3867                 :            : }
    3868                 :            : 
    3869                 :            : /* context to store the prepared attrs & pwqs before applying */
    3870                 :            : struct apply_wqattrs_ctx {
    3871                 :            :         struct workqueue_struct *wq;            /* target workqueue */
    3872                 :            :         struct workqueue_attrs  *attrs;         /* attrs to apply */
    3873                 :            :         struct list_head        list;           /* queued for batching commit */
    3874                 :            :         struct pool_workqueue   *dfl_pwq;
    3875                 :            :         struct pool_workqueue   *pwq_tbl[];
    3876                 :            : };
    3877                 :            : 
    3878                 :            : /* free the resources after success or abort */
    3879                 :          3 : static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
    3880                 :            : {
    3881                 :          3 :         if (ctx) {
    3882                 :            :                 int node;
    3883                 :            : 
    3884                 :          3 :                 for_each_node(node)
    3885                 :          3 :                         put_pwq_unlocked(ctx->pwq_tbl[node]);
    3886                 :          3 :                 put_pwq_unlocked(ctx->dfl_pwq);
    3887                 :            : 
    3888                 :          3 :                 free_workqueue_attrs(ctx->attrs);
    3889                 :            : 
    3890                 :          3 :                 kfree(ctx);
    3891                 :            :         }
    3892                 :          3 : }
    3893                 :            : 
    3894                 :            : /* allocate the attrs and pwqs for later installation */
    3895                 :            : static struct apply_wqattrs_ctx *
    3896                 :          3 : apply_wqattrs_prepare(struct workqueue_struct *wq,
    3897                 :            :                       const struct workqueue_attrs *attrs)
    3898                 :            : {
    3899                 :            :         struct apply_wqattrs_ctx *ctx;
    3900                 :            :         struct workqueue_attrs *new_attrs, *tmp_attrs;
    3901                 :            :         int node;
    3902                 :            : 
    3903                 :            :         lockdep_assert_held(&wq_pool_mutex);
    3904                 :            : 
    3905                 :          3 :         ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
    3906                 :            : 
    3907                 :          3 :         new_attrs = alloc_workqueue_attrs();
    3908                 :          3 :         tmp_attrs = alloc_workqueue_attrs();
    3909                 :          3 :         if (!ctx || !new_attrs || !tmp_attrs)
    3910                 :            :                 goto out_free;
    3911                 :            : 
    3912                 :            :         /*
    3913                 :            :          * Calculate the attrs of the default pwq.
    3914                 :            :          * If the user configured cpumask doesn't overlap with the
    3915                 :            :          * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask.
    3916                 :            :          */
    3917                 :            :         copy_workqueue_attrs(new_attrs, attrs);
    3918                 :            :         cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
    3919                 :          3 :         if (unlikely(cpumask_empty(new_attrs->cpumask)))
    3920                 :            :                 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
    3921                 :            : 
    3922                 :            :         /*
    3923                 :            :          * We may create multiple pwqs with differing cpumasks.  Make a
    3924                 :            :          * copy of @new_attrs which will be modified and used to obtain
    3925                 :            :          * pools.
    3926                 :            :          */
    3927                 :            :         copy_workqueue_attrs(tmp_attrs, new_attrs);
    3928                 :            : 
    3929                 :            :         /*
    3930                 :            :          * If something goes wrong during CPU up/down, we'll fall back to
    3931                 :            :          * the default pwq covering whole @attrs->cpumask.  Always create
    3932                 :            :          * it even if we don't use it immediately.
    3933                 :            :          */
    3934                 :          3 :         ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
    3935                 :          3 :         if (!ctx->dfl_pwq)
    3936                 :            :                 goto out_free;
    3937                 :            : 
    3938                 :          3 :         for_each_node(node) {
    3939                 :          3 :                 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
    3940                 :          0 :                         ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
    3941                 :          0 :                         if (!ctx->pwq_tbl[node])
    3942                 :            :                                 goto out_free;
    3943                 :            :                 } else {
    3944                 :          3 :                         ctx->dfl_pwq->refcnt++;
    3945                 :          3 :                         ctx->pwq_tbl[node] = ctx->dfl_pwq;
    3946                 :            :                 }
    3947                 :            :         }
    3948                 :            : 
    3949                 :            :         /* save the user configured attrs and sanitize it. */
    3950                 :            :         copy_workqueue_attrs(new_attrs, attrs);
    3951                 :            :         cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
    3952                 :          3 :         ctx->attrs = new_attrs;
    3953                 :            : 
    3954                 :          3 :         ctx->wq = wq;
    3955                 :            :         free_workqueue_attrs(tmp_attrs);
    3956                 :          3 :         return ctx;
    3957                 :            : 
    3958                 :            : out_free:
    3959                 :            :         free_workqueue_attrs(tmp_attrs);
    3960                 :            :         free_workqueue_attrs(new_attrs);
    3961                 :          0 :         apply_wqattrs_cleanup(ctx);
    3962                 :          0 :         return NULL;
    3963                 :            : }
    3964                 :            : 
    3965                 :            : /* set attrs and install prepared pwqs, @ctx points to old pwqs on return */
    3966                 :          3 : static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
    3967                 :            : {
    3968                 :            :         int node;
    3969                 :            : 
    3970                 :            :         /* all pwqs have been created successfully, let's install'em */
    3971                 :          3 :         mutex_lock(&ctx->wq->mutex);
    3972                 :            : 
    3973                 :          3 :         copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
    3974                 :            : 
    3975                 :            :         /* save the previous pwq and install the new one */
    3976                 :          3 :         for_each_node(node)
    3977                 :          3 :                 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
    3978                 :            :                                                           ctx->pwq_tbl[node]);
    3979                 :            : 
    3980                 :            :         /* @dfl_pwq might not have been used, ensure it's linked */
    3981                 :          3 :         link_pwq(ctx->dfl_pwq);
    3982                 :          3 :         swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
    3983                 :            : 
    3984                 :          3 :         mutex_unlock(&ctx->wq->mutex);
    3985                 :          3 : }
    3986                 :            : 
    3987                 :            : static void apply_wqattrs_lock(void)
    3988                 :            : {
    3989                 :            :         /* CPUs should stay stable across pwq creations and installations */
    3990                 :            :         get_online_cpus();
    3991                 :          0 :         mutex_lock(&wq_pool_mutex);
    3992                 :            : }
    3993                 :            : 
    3994                 :            : static void apply_wqattrs_unlock(void)
    3995                 :            : {
    3996                 :          0 :         mutex_unlock(&wq_pool_mutex);
    3997                 :            :         put_online_cpus();
    3998                 :            : }
    3999                 :            : 
    4000                 :          3 : static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
    4001                 :            :                                         const struct workqueue_attrs *attrs)
    4002                 :            : {
    4003                 :            :         struct apply_wqattrs_ctx *ctx;
    4004                 :            : 
    4005                 :            :         /* only unbound workqueues can change attributes */
    4006                 :          3 :         if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
    4007                 :            :                 return -EINVAL;
    4008                 :            : 
    4009                 :            :         /* creating multiple pwqs breaks ordering guarantee */
    4010                 :          3 :         if (!list_empty(&wq->pwqs)) {
    4011                 :          0 :                 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
    4012                 :            :                         return -EINVAL;
    4013                 :            : 
    4014                 :          0 :                 wq->flags &= ~__WQ_ORDERED;
    4015                 :            :         }
    4016                 :            : 
    4017                 :          3 :         ctx = apply_wqattrs_prepare(wq, attrs);
    4018                 :          3 :         if (!ctx)
    4019                 :            :                 return -ENOMEM;
    4020                 :            : 
    4021                 :            :         /* the ctx has been prepared successfully, let's commit it */
    4022                 :          3 :         apply_wqattrs_commit(ctx);
    4023                 :          3 :         apply_wqattrs_cleanup(ctx);
    4024                 :            : 
    4025                 :          3 :         return 0;
    4026                 :            : }
    4027                 :            : 
    4028                 :            : /**
    4029                 :            :  * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
    4030                 :            :  * @wq: the target workqueue
    4031                 :            :  * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
    4032                 :            :  *
    4033                 :            :  * Apply @attrs to an unbound workqueue @wq.  Unless disabled, on NUMA
    4034                 :            :  * machines, this function maps a separate pwq to each NUMA node with
    4035                 :            :  * possibles CPUs in @attrs->cpumask so that work items are affine to the
    4036                 :            :  * NUMA node it was issued on.  Older pwqs are released as in-flight work
    4037                 :            :  * items finish.  Note that a work item which repeatedly requeues itself
    4038                 :            :  * back-to-back will stay on its current pwq.
    4039                 :            :  *
    4040                 :            :  * Performs GFP_KERNEL allocations.
    4041                 :            :  *
    4042                 :            :  * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
    4043                 :            :  *
    4044                 :            :  * Return: 0 on success and -errno on failure.
    4045                 :            :  */
    4046                 :          3 : int apply_workqueue_attrs(struct workqueue_struct *wq,
    4047                 :            :                           const struct workqueue_attrs *attrs)
    4048                 :            : {
    4049                 :            :         int ret;
    4050                 :            : 
    4051                 :            :         lockdep_assert_cpus_held();
    4052                 :            : 
    4053                 :          3 :         mutex_lock(&wq_pool_mutex);
    4054                 :          3 :         ret = apply_workqueue_attrs_locked(wq, attrs);
    4055                 :          3 :         mutex_unlock(&wq_pool_mutex);
    4056                 :            : 
    4057                 :          3 :         return ret;
    4058                 :            : }
    4059                 :            : 
    4060                 :            : /**
    4061                 :            :  * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
    4062                 :            :  * @wq: the target workqueue
    4063                 :            :  * @cpu: the CPU coming up or going down
    4064                 :            :  * @online: whether @cpu is coming up or going down
    4065                 :            :  *
    4066                 :            :  * This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and
    4067                 :            :  * %CPU_DOWN_FAILED.  @cpu is being hot[un]plugged, update NUMA affinity of
    4068                 :            :  * @wq accordingly.
    4069                 :            :  *
    4070                 :            :  * If NUMA affinity can't be adjusted due to memory allocation failure, it
    4071                 :            :  * falls back to @wq->dfl_pwq which may not be optimal but is always
    4072                 :            :  * correct.
    4073                 :            :  *
    4074                 :            :  * Note that when the last allowed CPU of a NUMA node goes offline for a
    4075                 :            :  * workqueue with a cpumask spanning multiple nodes, the workers which were
    4076                 :            :  * already executing the work items for the workqueue will lose their CPU
    4077                 :            :  * affinity and may execute on any CPU.  This is similar to how per-cpu
    4078                 :            :  * workqueues behave on CPU_DOWN.  If a workqueue user wants strict
    4079                 :            :  * affinity, it's the user's responsibility to flush the work item from
    4080                 :            :  * CPU_DOWN_PREPARE.
    4081                 :            :  */
    4082                 :          3 : static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
    4083                 :            :                                    bool online)
    4084                 :            : {
    4085                 :            :         int node = cpu_to_node(cpu);
    4086                 :          3 :         int cpu_off = online ? -1 : cpu;
    4087                 :            :         struct pool_workqueue *old_pwq = NULL, *pwq;
    4088                 :            :         struct workqueue_attrs *target_attrs;
    4089                 :            :         cpumask_t *cpumask;
    4090                 :            : 
    4091                 :            :         lockdep_assert_held(&wq_pool_mutex);
    4092                 :            : 
    4093                 :          3 :         if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
    4094                 :          0 :             wq->unbound_attrs->no_numa)
    4095                 :            :                 return;
    4096                 :            : 
    4097                 :            :         /*
    4098                 :            :          * We don't wanna alloc/free wq_attrs for each wq for each CPU.
    4099                 :            :          * Let's use a preallocated one.  The following buf is protected by
    4100                 :            :          * CPU hotplug exclusion.
    4101                 :            :          */
    4102                 :          0 :         target_attrs = wq_update_unbound_numa_attrs_buf;
    4103                 :          0 :         cpumask = target_attrs->cpumask;
    4104                 :            : 
    4105                 :            :         copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
    4106                 :            :         pwq = unbound_pwq_by_node(wq, node);
    4107                 :            : 
    4108                 :            :         /*
    4109                 :            :          * Let's determine what needs to be done.  If the target cpumask is
    4110                 :            :          * different from the default pwq's, we need to compare it to @pwq's
    4111                 :            :          * and create a new one if they don't match.  If the target cpumask
    4112                 :            :          * equals the default pwq's, the default pwq should be used.
    4113                 :            :          */
    4114                 :          0 :         if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
    4115                 :          0 :                 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
    4116                 :            :                         return;
    4117                 :            :         } else {
    4118                 :            :                 goto use_dfl_pwq;
    4119                 :            :         }
    4120                 :            : 
    4121                 :            :         /* create a new pwq */
    4122                 :          0 :         pwq = alloc_unbound_pwq(wq, target_attrs);
    4123                 :          0 :         if (!pwq) {
    4124                 :          0 :                 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
    4125                 :            :                         wq->name);
    4126                 :          0 :                 goto use_dfl_pwq;
    4127                 :            :         }
    4128                 :            : 
    4129                 :            :         /* Install the new pwq. */
    4130                 :          0 :         mutex_lock(&wq->mutex);
    4131                 :            :         old_pwq = numa_pwq_tbl_install(wq, node, pwq);
    4132                 :          0 :         goto out_unlock;
    4133                 :            : 
    4134                 :            : use_dfl_pwq:
    4135                 :          0 :         mutex_lock(&wq->mutex);
    4136                 :          0 :         spin_lock_irq(&wq->dfl_pwq->pool->lock);
    4137                 :          0 :         get_pwq(wq->dfl_pwq);
    4138                 :          0 :         spin_unlock_irq(&wq->dfl_pwq->pool->lock);
    4139                 :          0 :         old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
    4140                 :            : out_unlock:
    4141                 :          0 :         mutex_unlock(&wq->mutex);
    4142                 :          0 :         put_pwq_unlocked(old_pwq);
    4143                 :            : }
    4144                 :            : 
    4145                 :          3 : static int alloc_and_link_pwqs(struct workqueue_struct *wq)
    4146                 :            : {
    4147                 :          3 :         bool highpri = wq->flags & WQ_HIGHPRI;
    4148                 :            :         int cpu, ret;
    4149                 :            : 
    4150                 :          3 :         if (!(wq->flags & WQ_UNBOUND)) {
    4151                 :          3 :                 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
    4152                 :          3 :                 if (!wq->cpu_pwqs)
    4153                 :            :                         return -ENOMEM;
    4154                 :            : 
    4155                 :          3 :                 for_each_possible_cpu(cpu) {
    4156                 :            :                         struct pool_workqueue *pwq =
    4157                 :          3 :                                 per_cpu_ptr(wq->cpu_pwqs, cpu);
    4158                 :            :                         struct worker_pool *cpu_pools =
    4159                 :          3 :                                 per_cpu(cpu_worker_pools, cpu);
    4160                 :            : 
    4161                 :          3 :                         init_pwq(pwq, wq, &cpu_pools[highpri]);
    4162                 :            : 
    4163                 :          3 :                         mutex_lock(&wq->mutex);
    4164                 :          3 :                         link_pwq(pwq);
    4165                 :          3 :                         mutex_unlock(&wq->mutex);
    4166                 :            :                 }
    4167                 :            :                 return 0;
    4168                 :            :         }
    4169                 :            : 
    4170                 :            :         get_online_cpus();
    4171                 :          3 :         if (wq->flags & __WQ_ORDERED) {
    4172                 :          3 :                 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
    4173                 :            :                 /* there should only be single pwq for ordering guarantee */
    4174                 :          3 :                 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
    4175                 :            :                               wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
    4176                 :            :                      "ordering guarantee broken for workqueue %s\n", wq->name);
    4177                 :            :         } else {
    4178                 :          3 :                 ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
    4179                 :            :         }
    4180                 :            :         put_online_cpus();
    4181                 :            : 
    4182                 :          3 :         return ret;
    4183                 :            : }
    4184                 :            : 
    4185                 :          3 : static int wq_clamp_max_active(int max_active, unsigned int flags,
    4186                 :            :                                const char *name)
    4187                 :            : {
    4188                 :          3 :         int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
    4189                 :            : 
    4190                 :          3 :         if (max_active < 1 || max_active > lim)
    4191                 :          0 :                 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
    4192                 :            :                         max_active, name, 1, lim);
    4193                 :            : 
    4194                 :          3 :         return clamp_val(max_active, 1, lim);
    4195                 :            : }
    4196                 :            : 
    4197                 :            : /*
    4198                 :            :  * Workqueues which may be used during memory reclaim should have a rescuer
    4199                 :            :  * to guarantee forward progress.
    4200                 :            :  */
    4201                 :          3 : static int init_rescuer(struct workqueue_struct *wq)
    4202                 :            : {
    4203                 :            :         struct worker *rescuer;
    4204                 :            :         int ret;
    4205                 :            : 
    4206                 :          3 :         if (!(wq->flags & WQ_MEM_RECLAIM))
    4207                 :            :                 return 0;
    4208                 :            : 
    4209                 :          3 :         rescuer = alloc_worker(NUMA_NO_NODE);
    4210                 :          3 :         if (!rescuer)
    4211                 :            :                 return -ENOMEM;
    4212                 :            : 
    4213                 :          3 :         rescuer->rescue_wq = wq;
    4214                 :          3 :         rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
    4215                 :            :         ret = PTR_ERR_OR_ZERO(rescuer->task);
    4216                 :          3 :         if (ret) {
    4217                 :          0 :                 kfree(rescuer);
    4218                 :          0 :                 return ret;
    4219                 :            :         }
    4220                 :            : 
    4221                 :          3 :         wq->rescuer = rescuer;
    4222                 :          3 :         kthread_bind_mask(rescuer->task, cpu_possible_mask);
    4223                 :          3 :         wake_up_process(rescuer->task);
    4224                 :            : 
    4225                 :          3 :         return 0;
    4226                 :            : }
    4227                 :            : 
    4228                 :            : __printf(1, 4)
    4229                 :          3 : struct workqueue_struct *alloc_workqueue(const char *fmt,
    4230                 :            :                                          unsigned int flags,
    4231                 :            :                                          int max_active, ...)
    4232                 :            : {
    4233                 :            :         size_t tbl_size = 0;
    4234                 :            :         va_list args;
    4235                 :            :         struct workqueue_struct *wq;
    4236                 :            :         struct pool_workqueue *pwq;
    4237                 :            : 
    4238                 :            :         /*
    4239                 :            :          * Unbound && max_active == 1 used to imply ordered, which is no
    4240                 :            :          * longer the case on NUMA machines due to per-node pools.  While
    4241                 :            :          * alloc_ordered_workqueue() is the right way to create an ordered
    4242                 :            :          * workqueue, keep the previous behavior to avoid subtle breakages
    4243                 :            :          * on NUMA.
    4244                 :            :          */
    4245                 :          3 :         if ((flags & WQ_UNBOUND) && max_active == 1)
    4246                 :          3 :                 flags |= __WQ_ORDERED;
    4247                 :            : 
    4248                 :            :         /* see the comment above the definition of WQ_POWER_EFFICIENT */
    4249                 :          3 :         if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
    4250                 :          0 :                 flags |= WQ_UNBOUND;
    4251                 :            : 
    4252                 :            :         /* allocate wq and format name */
    4253                 :          3 :         if (flags & WQ_UNBOUND)
    4254                 :            :                 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
    4255                 :            : 
    4256                 :          3 :         wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
    4257                 :          3 :         if (!wq)
    4258                 :            :                 return NULL;
    4259                 :            : 
    4260                 :          3 :         if (flags & WQ_UNBOUND) {
    4261                 :          3 :                 wq->unbound_attrs = alloc_workqueue_attrs();
    4262                 :          3 :                 if (!wq->unbound_attrs)
    4263                 :            :                         goto err_free_wq;
    4264                 :            :         }
    4265                 :            : 
    4266                 :          3 :         va_start(args, max_active);
    4267                 :          3 :         vsnprintf(wq->name, sizeof(wq->name), fmt, args);
    4268                 :          3 :         va_end(args);
    4269                 :            : 
    4270                 :          3 :         max_active = max_active ?: WQ_DFL_ACTIVE;
    4271                 :          3 :         max_active = wq_clamp_max_active(max_active, flags, wq->name);
    4272                 :            : 
    4273                 :            :         /* init wq */
    4274                 :          3 :         wq->flags = flags;
    4275                 :          3 :         wq->saved_max_active = max_active;
    4276                 :          3 :         mutex_init(&wq->mutex);
    4277                 :            :         atomic_set(&wq->nr_pwqs_to_flush, 0);
    4278                 :          3 :         INIT_LIST_HEAD(&wq->pwqs);
    4279                 :          3 :         INIT_LIST_HEAD(&wq->flusher_queue);
    4280                 :          3 :         INIT_LIST_HEAD(&wq->flusher_overflow);
    4281                 :          3 :         INIT_LIST_HEAD(&wq->maydays);
    4282                 :            : 
    4283                 :            :         wq_init_lockdep(wq);
    4284                 :          3 :         INIT_LIST_HEAD(&wq->list);
    4285                 :            : 
    4286                 :          3 :         if (alloc_and_link_pwqs(wq) < 0)
    4287                 :            :                 goto err_unreg_lockdep;
    4288                 :            : 
    4289                 :          3 :         if (wq_online && init_rescuer(wq) < 0)
    4290                 :            :                 goto err_destroy;
    4291                 :            : 
    4292                 :          3 :         if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
    4293                 :            :                 goto err_destroy;
    4294                 :            : 
    4295                 :            :         /*
    4296                 :            :          * wq_pool_mutex protects global freeze state and workqueues list.
    4297                 :            :          * Grab it, adjust max_active and add the new @wq to workqueues
    4298                 :            :          * list.
    4299                 :            :          */
    4300                 :          3 :         mutex_lock(&wq_pool_mutex);
    4301                 :            : 
    4302                 :          3 :         mutex_lock(&wq->mutex);
    4303                 :          3 :         for_each_pwq(pwq, wq)
    4304                 :          3 :                 pwq_adjust_max_active(pwq);
    4305                 :          3 :         mutex_unlock(&wq->mutex);
    4306                 :            : 
    4307                 :            :         list_add_tail_rcu(&wq->list, &workqueues);
    4308                 :            : 
    4309                 :          3 :         mutex_unlock(&wq_pool_mutex);
    4310                 :            : 
    4311                 :          3 :         return wq;
    4312                 :            : 
    4313                 :            : err_unreg_lockdep:
    4314                 :            :         wq_unregister_lockdep(wq);
    4315                 :            :         wq_free_lockdep(wq);
    4316                 :            : err_free_wq:
    4317                 :          0 :         free_workqueue_attrs(wq->unbound_attrs);
    4318                 :          0 :         kfree(wq);
    4319                 :          0 :         return NULL;
    4320                 :            : err_destroy:
    4321                 :          0 :         destroy_workqueue(wq);
    4322                 :          0 :         return NULL;
    4323                 :            : }
    4324                 :            : EXPORT_SYMBOL_GPL(alloc_workqueue);
    4325                 :            : 
    4326                 :            : /**
    4327                 :            :  * destroy_workqueue - safely terminate a workqueue
    4328                 :            :  * @wq: target workqueue
    4329                 :            :  *
    4330                 :            :  * Safely destroy a workqueue. All work currently pending will be done first.
    4331                 :            :  */
    4332                 :          2 : void destroy_workqueue(struct workqueue_struct *wq)
    4333                 :            : {
    4334                 :            :         struct pool_workqueue *pwq;
    4335                 :            :         int node;
    4336                 :            : 
    4337                 :            :         /*
    4338                 :            :          * Remove it from sysfs first so that sanity check failure doesn't
    4339                 :            :          * lead to sysfs name conflicts.
    4340                 :            :          */
    4341                 :            :         workqueue_sysfs_unregister(wq);
    4342                 :            : 
    4343                 :            :         /* drain it before proceeding with destruction */
    4344                 :          2 :         drain_workqueue(wq);
    4345                 :            : 
    4346                 :            :         /* kill rescuer, if sanity checks fail, leave it w/o rescuer */
    4347                 :          2 :         if (wq->rescuer) {
    4348                 :            :                 struct worker *rescuer = wq->rescuer;
    4349                 :            : 
    4350                 :            :                 /* this prevents new queueing */
    4351                 :            :                 spin_lock_irq(&wq_mayday_lock);
    4352                 :          2 :                 wq->rescuer = NULL;
    4353                 :            :                 spin_unlock_irq(&wq_mayday_lock);
    4354                 :            : 
    4355                 :            :                 /* rescuer will empty maydays list before exiting */
    4356                 :          2 :                 kthread_stop(rescuer->task);
    4357                 :          2 :                 kfree(rescuer);
    4358                 :            :         }
    4359                 :            : 
    4360                 :            :         /* sanity checks */
    4361                 :          2 :         mutex_lock(&wq->mutex);
    4362                 :          2 :         for_each_pwq(pwq, wq) {
    4363                 :            :                 int i;
    4364                 :            : 
    4365                 :          2 :                 for (i = 0; i < WORK_NR_COLORS; i++) {
    4366                 :          2 :                         if (WARN_ON(pwq->nr_in_flight[i])) {
    4367                 :          0 :                                 mutex_unlock(&wq->mutex);
    4368                 :          0 :                                 show_workqueue_state();
    4369                 :          0 :                                 return;
    4370                 :            :                         }
    4371                 :            :                 }
    4372                 :            : 
    4373                 :          2 :                 if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
    4374                 :          2 :                     WARN_ON(pwq->nr_active) ||
    4375                 :          2 :                     WARN_ON(!list_empty(&pwq->delayed_works))) {
    4376                 :          0 :                         mutex_unlock(&wq->mutex);
    4377                 :          0 :                         show_workqueue_state();
    4378                 :          0 :                         return;
    4379                 :            :                 }
    4380                 :            :         }
    4381                 :          2 :         mutex_unlock(&wq->mutex);
    4382                 :            : 
    4383                 :            :         /*
    4384                 :            :          * wq list is used to freeze wq, remove from list after
    4385                 :            :          * flushing is complete in case freeze races us.
    4386                 :            :          */
    4387                 :          2 :         mutex_lock(&wq_pool_mutex);
    4388                 :            :         list_del_rcu(&wq->list);
    4389                 :          2 :         mutex_unlock(&wq_pool_mutex);
    4390                 :            : 
    4391                 :          2 :         if (!(wq->flags & WQ_UNBOUND)) {
    4392                 :            :                 wq_unregister_lockdep(wq);
    4393                 :            :                 /*
    4394                 :            :                  * The base ref is never dropped on per-cpu pwqs.  Directly
    4395                 :            :                  * schedule RCU free.
    4396                 :            :                  */
    4397                 :          0 :                 call_rcu(&wq->rcu, rcu_free_wq);
    4398                 :            :         } else {
    4399                 :            :                 /*
    4400                 :            :                  * We're the sole accessor of @wq at this point.  Directly
    4401                 :            :                  * access numa_pwq_tbl[] and dfl_pwq to put the base refs.
    4402                 :            :                  * @wq will be freed when the last pwq is released.
    4403                 :            :                  */
    4404                 :          2 :                 for_each_node(node) {
    4405                 :          2 :                         pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
    4406                 :            :                         RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
    4407                 :          2 :                         put_pwq_unlocked(pwq);
    4408                 :            :                 }
    4409                 :            : 
    4410                 :            :                 /*
    4411                 :            :                  * Put dfl_pwq.  @wq may be freed any time after dfl_pwq is
    4412                 :            :                  * put.  Don't access it afterwards.
    4413                 :            :                  */
    4414                 :          2 :                 pwq = wq->dfl_pwq;
    4415                 :          2 :                 wq->dfl_pwq = NULL;
    4416                 :          2 :                 put_pwq_unlocked(pwq);
    4417                 :            :         }
    4418                 :            : }
    4419                 :            : EXPORT_SYMBOL_GPL(destroy_workqueue);
    4420                 :            : 
    4421                 :            : /**
    4422                 :            :  * workqueue_set_max_active - adjust max_active of a workqueue
    4423                 :            :  * @wq: target workqueue
    4424                 :            :  * @max_active: new max_active value.
    4425                 :            :  *
    4426                 :            :  * Set max_active of @wq to @max_active.
    4427                 :            :  *
    4428                 :            :  * CONTEXT:
    4429                 :            :  * Don't call from IRQ context.
    4430                 :            :  */
    4431                 :          0 : void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
    4432                 :            : {
    4433                 :            :         struct pool_workqueue *pwq;
    4434                 :            : 
    4435                 :            :         /* disallow meddling with max_active for ordered workqueues */
    4436                 :          0 :         if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
    4437                 :          0 :                 return;
    4438                 :            : 
    4439                 :          0 :         max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
    4440                 :            : 
    4441                 :          0 :         mutex_lock(&wq->mutex);
    4442                 :            : 
    4443                 :          0 :         wq->flags &= ~__WQ_ORDERED;
    4444                 :          0 :         wq->saved_max_active = max_active;
    4445                 :            : 
    4446                 :          0 :         for_each_pwq(pwq, wq)
    4447                 :          0 :                 pwq_adjust_max_active(pwq);
    4448                 :            : 
    4449                 :          0 :         mutex_unlock(&wq->mutex);
    4450                 :            : }
    4451                 :            : EXPORT_SYMBOL_GPL(workqueue_set_max_active);
    4452                 :            : 
    4453                 :            : /**
    4454                 :            :  * current_work - retrieve %current task's work struct
    4455                 :            :  *
    4456                 :            :  * Determine if %current task is a workqueue worker and what it's working on.
    4457                 :            :  * Useful to find out the context that the %current task is running in.
    4458                 :            :  *
    4459                 :            :  * Return: work struct if %current task is a workqueue worker, %NULL otherwise.
    4460                 :            :  */
    4461                 :          0 : struct work_struct *current_work(void)
    4462                 :            : {
    4463                 :          0 :         struct worker *worker = current_wq_worker();
    4464                 :            : 
    4465                 :          0 :         return worker ? worker->current_work : NULL;
    4466                 :            : }
    4467                 :            : EXPORT_SYMBOL(current_work);
    4468                 :            : 
    4469                 :            : /**
    4470                 :            :  * current_is_workqueue_rescuer - is %current workqueue rescuer?
    4471                 :            :  *
    4472                 :            :  * Determine whether %current is a workqueue rescuer.  Can be used from
    4473                 :            :  * work functions to determine whether it's being run off the rescuer task.
    4474                 :            :  *
    4475                 :            :  * Return: %true if %current is a workqueue rescuer. %false otherwise.
    4476                 :            :  */
    4477                 :          3 : bool current_is_workqueue_rescuer(void)
    4478                 :            : {
    4479                 :          3 :         struct worker *worker = current_wq_worker();
    4480                 :            : 
    4481                 :          3 :         return worker && worker->rescue_wq;
    4482                 :            : }
    4483                 :            : 
    4484                 :            : /**
    4485                 :            :  * workqueue_congested - test whether a workqueue is congested
    4486                 :            :  * @cpu: CPU in question
    4487                 :            :  * @wq: target workqueue
    4488                 :            :  *
    4489                 :            :  * Test whether @wq's cpu workqueue for @cpu is congested.  There is
    4490                 :            :  * no synchronization around this function and the test result is
    4491                 :            :  * unreliable and only useful as advisory hints or for debugging.
    4492                 :            :  *
    4493                 :            :  * If @cpu is WORK_CPU_UNBOUND, the test is performed on the local CPU.
    4494                 :            :  * Note that both per-cpu and unbound workqueues may be associated with
    4495                 :            :  * multiple pool_workqueues which have separate congested states.  A
    4496                 :            :  * workqueue being congested on one CPU doesn't mean the workqueue is also
    4497                 :            :  * contested on other CPUs / NUMA nodes.
    4498                 :            :  *
    4499                 :            :  * Return:
    4500                 :            :  * %true if congested, %false otherwise.
    4501                 :            :  */
    4502                 :          0 : bool workqueue_congested(int cpu, struct workqueue_struct *wq)
    4503                 :            : {
    4504                 :            :         struct pool_workqueue *pwq;
    4505                 :            :         bool ret;
    4506                 :            : 
    4507                 :            :         rcu_read_lock();
    4508                 :          0 :         preempt_disable();
    4509                 :            : 
    4510                 :          0 :         if (cpu == WORK_CPU_UNBOUND)
    4511                 :          0 :                 cpu = smp_processor_id();
    4512                 :            : 
    4513                 :          0 :         if (!(wq->flags & WQ_UNBOUND))
    4514                 :          0 :                 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
    4515                 :            :         else
    4516                 :            :                 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
    4517                 :            : 
    4518                 :          0 :         ret = !list_empty(&pwq->delayed_works);
    4519                 :          0 :         preempt_enable();
    4520                 :            :         rcu_read_unlock();
    4521                 :            : 
    4522                 :          0 :         return ret;
    4523                 :            : }
    4524                 :            : EXPORT_SYMBOL_GPL(workqueue_congested);
    4525                 :            : 
    4526                 :            : /**
    4527                 :            :  * work_busy - test whether a work is currently pending or running
    4528                 :            :  * @work: the work to be tested
    4529                 :            :  *
    4530                 :            :  * Test whether @work is currently pending or running.  There is no
    4531                 :            :  * synchronization around this function and the test result is
    4532                 :            :  * unreliable and only useful as advisory hints or for debugging.
    4533                 :            :  *
    4534                 :            :  * Return:
    4535                 :            :  * OR'd bitmask of WORK_BUSY_* bits.
    4536                 :            :  */
    4537                 :          0 : unsigned int work_busy(struct work_struct *work)
    4538                 :            : {
    4539                 :            :         struct worker_pool *pool;
    4540                 :            :         unsigned long flags;
    4541                 :            :         unsigned int ret = 0;
    4542                 :            : 
    4543                 :          0 :         if (work_pending(work))
    4544                 :            :                 ret |= WORK_BUSY_PENDING;
    4545                 :            : 
    4546                 :            :         rcu_read_lock();
    4547                 :          0 :         pool = get_work_pool(work);
    4548                 :          0 :         if (pool) {
    4549                 :          0 :                 spin_lock_irqsave(&pool->lock, flags);
    4550                 :          0 :                 if (find_worker_executing_work(pool, work))
    4551                 :          0 :                         ret |= WORK_BUSY_RUNNING;
    4552                 :            :                 spin_unlock_irqrestore(&pool->lock, flags);
    4553                 :            :         }
    4554                 :            :         rcu_read_unlock();
    4555                 :            : 
    4556                 :          0 :         return ret;
    4557                 :            : }
    4558                 :            : EXPORT_SYMBOL_GPL(work_busy);
    4559                 :            : 
    4560                 :            : /**
    4561                 :            :  * set_worker_desc - set description for the current work item
    4562                 :            :  * @fmt: printf-style format string
    4563                 :            :  * @...: arguments for the format string
    4564                 :            :  *
    4565                 :            :  * This function can be called by a running work function to describe what
    4566                 :            :  * the work item is about.  If the worker task gets dumped, this
    4567                 :            :  * information will be printed out together to help debugging.  The
    4568                 :            :  * description can be at most WORKER_DESC_LEN including the trailing '\0'.
    4569                 :            :  */
    4570                 :          3 : void set_worker_desc(const char *fmt, ...)
    4571                 :            : {
    4572                 :          3 :         struct worker *worker = current_wq_worker();
    4573                 :            :         va_list args;
    4574                 :            : 
    4575                 :          3 :         if (worker) {
    4576                 :          3 :                 va_start(args, fmt);
    4577                 :          3 :                 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
    4578                 :          3 :                 va_end(args);
    4579                 :            :         }
    4580                 :          3 : }
    4581                 :            : EXPORT_SYMBOL_GPL(set_worker_desc);
    4582                 :            : 
    4583                 :            : /**
    4584                 :            :  * print_worker_info - print out worker information and description
    4585                 :            :  * @log_lvl: the log level to use when printing
    4586                 :            :  * @task: target task
    4587                 :            :  *
    4588                 :            :  * If @task is a worker and currently executing a work item, print out the
    4589                 :            :  * name of the workqueue being serviced and worker description set with
    4590                 :            :  * set_worker_desc() by the currently executing work item.
    4591                 :            :  *
    4592                 :            :  * This function can be safely called on any task as long as the
    4593                 :            :  * task_struct itself is accessible.  While safe, this function isn't
    4594                 :            :  * synchronized and may print out mixups or garbages of limited length.
    4595                 :            :  */
    4596                 :          1 : void print_worker_info(const char *log_lvl, struct task_struct *task)
    4597                 :            : {
    4598                 :          1 :         work_func_t *fn = NULL;
    4599                 :          1 :         char name[WQ_NAME_LEN] = { };
    4600                 :          1 :         char desc[WORKER_DESC_LEN] = { };
    4601                 :          1 :         struct pool_workqueue *pwq = NULL;
    4602                 :          1 :         struct workqueue_struct *wq = NULL;
    4603                 :            :         struct worker *worker;
    4604                 :            : 
    4605                 :          1 :         if (!(task->flags & PF_WQ_WORKER))
    4606                 :          1 :                 return;
    4607                 :            : 
    4608                 :            :         /*
    4609                 :            :          * This function is called without any synchronization and @task
    4610                 :            :          * could be in any state.  Be careful with dereferences.
    4611                 :            :          */
    4612                 :          1 :         worker = kthread_probe_data(task);
    4613                 :            : 
    4614                 :            :         /*
    4615                 :            :          * Carefully copy the associated workqueue's workfn, name and desc.
    4616                 :            :          * Keep the original last '\0' in case the original is garbage.
    4617                 :            :          */
    4618                 :          1 :         probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
    4619                 :          1 :         probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
    4620                 :          1 :         probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
    4621                 :          1 :         probe_kernel_read(name, wq->name, sizeof(name) - 1);
    4622                 :          1 :         probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
    4623                 :            : 
    4624                 :          1 :         if (fn || name[0] || desc[0]) {
    4625                 :          1 :                 printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
    4626                 :          1 :                 if (strcmp(name, desc))
    4627                 :          0 :                         pr_cont(" (%s)", desc);
    4628                 :          1 :                 pr_cont("\n");
    4629                 :            :         }
    4630                 :            : }
    4631                 :            : 
    4632                 :          0 : static void pr_cont_pool_info(struct worker_pool *pool)
    4633                 :            : {
    4634                 :          0 :         pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
    4635                 :          0 :         if (pool->node != NUMA_NO_NODE)
    4636                 :          0 :                 pr_cont(" node=%d", pool->node);
    4637                 :          0 :         pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
    4638                 :          0 : }
    4639                 :            : 
    4640                 :          0 : static void pr_cont_work(bool comma, struct work_struct *work)
    4641                 :            : {
    4642                 :          0 :         if (work->func == wq_barrier_func) {
    4643                 :            :                 struct wq_barrier *barr;
    4644                 :            : 
    4645                 :            :                 barr = container_of(work, struct wq_barrier, work);
    4646                 :            : 
    4647                 :          0 :                 pr_cont("%s BAR(%d)", comma ? "," : "",
    4648                 :            :                         task_pid_nr(barr->task));
    4649                 :            :         } else {
    4650                 :          0 :                 pr_cont("%s %ps", comma ? "," : "", work->func);
    4651                 :            :         }
    4652                 :          0 : }
    4653                 :            : 
    4654                 :          0 : static void show_pwq(struct pool_workqueue *pwq)
    4655                 :            : {
    4656                 :          0 :         struct worker_pool *pool = pwq->pool;
    4657                 :            :         struct work_struct *work;
    4658                 :            :         struct worker *worker;
    4659                 :            :         bool has_in_flight = false, has_pending = false;
    4660                 :            :         int bkt;
    4661                 :            : 
    4662                 :          0 :         pr_info("  pwq %d:", pool->id);
    4663                 :          0 :         pr_cont_pool_info(pool);
    4664                 :            : 
    4665                 :          0 :         pr_cont(" active=%d/%d refcnt=%d%s\n",
    4666                 :            :                 pwq->nr_active, pwq->max_active, pwq->refcnt,
    4667                 :            :                 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
    4668                 :            : 
    4669                 :          0 :         hash_for_each(pool->busy_hash, bkt, worker, hentry) {
    4670                 :          0 :                 if (worker->current_pwq == pwq) {
    4671                 :            :                         has_in_flight = true;
    4672                 :            :                         break;
    4673                 :            :                 }
    4674                 :            :         }
    4675                 :          0 :         if (has_in_flight) {
    4676                 :            :                 bool comma = false;
    4677                 :            : 
    4678                 :          0 :                 pr_info("    in-flight:");
    4679                 :          0 :                 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
    4680                 :          0 :                         if (worker->current_pwq != pwq)
    4681                 :          0 :                                 continue;
    4682                 :            : 
    4683                 :          0 :                         pr_cont("%s %d%s:%ps", comma ? "," : "",
    4684                 :            :                                 task_pid_nr(worker->task),
    4685                 :            :                                 worker == pwq->wq->rescuer ? "(RESCUER)" : "",
    4686                 :            :                                 worker->current_func);
    4687                 :          0 :                         list_for_each_entry(work, &worker->scheduled, entry)
    4688                 :          0 :                                 pr_cont_work(false, work);
    4689                 :            :                         comma = true;
    4690                 :            :                 }
    4691                 :          0 :                 pr_cont("\n");
    4692                 :            :         }
    4693                 :            : 
    4694                 :          0 :         list_for_each_entry(work, &pool->worklist, entry) {
    4695                 :          0 :                 if (get_work_pwq(work) == pwq) {
    4696                 :            :                         has_pending = true;
    4697                 :            :                         break;
    4698                 :            :                 }
    4699                 :            :         }
    4700                 :          0 :         if (has_pending) {
    4701                 :            :                 bool comma = false;
    4702                 :            : 
    4703                 :          0 :                 pr_info("    pending:");
    4704                 :          0 :                 list_for_each_entry(work, &pool->worklist, entry) {
    4705                 :          0 :                         if (get_work_pwq(work) != pwq)
    4706                 :          0 :                                 continue;
    4707                 :            : 
    4708                 :          0 :                         pr_cont_work(comma, work);
    4709                 :          0 :                         comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
    4710                 :            :                 }
    4711                 :          0 :                 pr_cont("\n");
    4712                 :            :         }
    4713                 :            : 
    4714                 :          0 :         if (!list_empty(&pwq->delayed_works)) {
    4715                 :            :                 bool comma = false;
    4716                 :            : 
    4717                 :          0 :                 pr_info("    delayed:");
    4718                 :          0 :                 list_for_each_entry(work, &pwq->delayed_works, entry) {
    4719                 :          0 :                         pr_cont_work(comma, work);
    4720                 :          0 :                         comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
    4721                 :            :                 }
    4722                 :          0 :                 pr_cont("\n");
    4723                 :            :         }
    4724                 :          0 : }
    4725                 :            : 
    4726                 :            : /**
    4727                 :            :  * show_workqueue_state - dump workqueue state
    4728                 :            :  *
    4729                 :            :  * Called from a sysrq handler or try_to_freeze_tasks() and prints out
    4730                 :            :  * all busy workqueues and pools.
    4731                 :            :  */
    4732                 :          0 : void show_workqueue_state(void)
    4733                 :            : {
    4734                 :            :         struct workqueue_struct *wq;
    4735                 :            :         struct worker_pool *pool;
    4736                 :            :         unsigned long flags;
    4737                 :            :         int pi;
    4738                 :            : 
    4739                 :            :         rcu_read_lock();
    4740                 :            : 
    4741                 :          0 :         pr_info("Showing busy workqueues and worker pools:\n");
    4742                 :            : 
    4743                 :          0 :         list_for_each_entry_rcu(wq, &workqueues, list) {
    4744                 :            :                 struct pool_workqueue *pwq;
    4745                 :            :                 bool idle = true;
    4746                 :            : 
    4747                 :          0 :                 for_each_pwq(pwq, wq) {
    4748                 :          0 :                         if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
    4749                 :            :                                 idle = false;
    4750                 :            :                                 break;
    4751                 :            :                         }
    4752                 :            :                 }
    4753                 :          0 :                 if (idle)
    4754                 :          0 :                         continue;
    4755                 :            : 
    4756                 :          0 :                 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
    4757                 :            : 
    4758                 :          0 :                 for_each_pwq(pwq, wq) {
    4759                 :          0 :                         spin_lock_irqsave(&pwq->pool->lock, flags);
    4760                 :          0 :                         if (pwq->nr_active || !list_empty(&pwq->delayed_works))
    4761                 :          0 :                                 show_pwq(pwq);
    4762                 :          0 :                         spin_unlock_irqrestore(&pwq->pool->lock, flags);
    4763                 :            :                         /*
    4764                 :            :                          * We could be printing a lot from atomic context, e.g.
    4765                 :            :                          * sysrq-t -> show_workqueue_state(). Avoid triggering
    4766                 :            :                          * hard lockup.
    4767                 :            :                          */
    4768                 :            :                         touch_nmi_watchdog();
    4769                 :            :                 }
    4770                 :            :         }
    4771                 :            : 
    4772                 :          0 :         for_each_pool(pool, pi) {
    4773                 :            :                 struct worker *worker;
    4774                 :            :                 bool first = true;
    4775                 :            : 
    4776                 :          0 :                 spin_lock_irqsave(&pool->lock, flags);
    4777                 :          0 :                 if (pool->nr_workers == pool->nr_idle)
    4778                 :            :                         goto next_pool;
    4779                 :            : 
    4780                 :          0 :                 pr_info("pool %d:", pool->id);
    4781                 :          0 :                 pr_cont_pool_info(pool);
    4782                 :          0 :                 pr_cont(" hung=%us workers=%d",
    4783                 :            :                         jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
    4784                 :            :                         pool->nr_workers);
    4785                 :          0 :                 if (pool->manager)
    4786                 :          0 :                         pr_cont(" manager: %d",
    4787                 :            :                                 task_pid_nr(pool->manager->task));
    4788                 :          0 :                 list_for_each_entry(worker, &pool->idle_list, entry) {
    4789                 :          0 :                         pr_cont(" %s%d", first ? "idle: " : "",
    4790                 :            :                                 task_pid_nr(worker->task));
    4791                 :            :                         first = false;
    4792                 :            :                 }
    4793                 :          0 :                 pr_cont("\n");
    4794                 :            :         next_pool:
    4795                 :            :                 spin_unlock_irqrestore(&pool->lock, flags);
    4796                 :            :                 /*
    4797                 :            :                  * We could be printing a lot from atomic context, e.g.
    4798                 :            :                  * sysrq-t -> show_workqueue_state(). Avoid triggering
    4799                 :            :                  * hard lockup.
    4800                 :            :                  */
    4801                 :            :                 touch_nmi_watchdog();
    4802                 :            :         }
    4803                 :            : 
    4804                 :            :         rcu_read_unlock();
    4805                 :          0 : }
    4806                 :            : 
    4807                 :            : /* used to show worker information through /proc/PID/{comm,stat,status} */
    4808                 :          3 : void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
    4809                 :            : {
    4810                 :            :         int off;
    4811                 :            : 
    4812                 :            :         /* always show the actual comm */
    4813                 :          3 :         off = strscpy(buf, task->comm, size);
    4814                 :          3 :         if (off < 0)
    4815                 :          3 :                 return;
    4816                 :            : 
    4817                 :            :         /* stabilize PF_WQ_WORKER and worker pool association */
    4818                 :          3 :         mutex_lock(&wq_pool_attach_mutex);
    4819                 :            : 
    4820                 :          3 :         if (task->flags & PF_WQ_WORKER) {
    4821                 :          3 :                 struct worker *worker = kthread_data(task);
    4822                 :          3 :                 struct worker_pool *pool = worker->pool;
    4823                 :            : 
    4824                 :          3 :                 if (pool) {
    4825                 :            :                         spin_lock_irq(&pool->lock);
    4826                 :            :                         /*
    4827                 :            :                          * ->desc tracks information (wq name or
    4828                 :            :                          * set_worker_desc()) for the latest execution.  If
    4829                 :            :                          * current, prepend '+', otherwise '-'.
    4830                 :            :                          */
    4831                 :          3 :                         if (worker->desc[0] != '\0') {
    4832                 :          3 :                                 if (worker->current_work)
    4833                 :          3 :                                         scnprintf(buf + off, size - off, "+%s",
    4834                 :          3 :                                                   worker->desc);
    4835                 :            :                                 else
    4836                 :          3 :                                         scnprintf(buf + off, size - off, "-%s",
    4837                 :          3 :                                                   worker->desc);
    4838                 :            :                         }
    4839                 :            :                         spin_unlock_irq(&pool->lock);
    4840                 :            :                 }
    4841                 :            :         }
    4842                 :            : 
    4843                 :          3 :         mutex_unlock(&wq_pool_attach_mutex);
    4844                 :            : }
    4845                 :            : 
    4846                 :            : #ifdef CONFIG_SMP
    4847                 :            : 
    4848                 :            : /*
    4849                 :            :  * CPU hotplug.
    4850                 :            :  *
    4851                 :            :  * There are two challenges in supporting CPU hotplug.  Firstly, there
    4852                 :            :  * are a lot of assumptions on strong associations among work, pwq and
    4853                 :            :  * pool which make migrating pending and scheduled works very
    4854                 :            :  * difficult to implement without impacting hot paths.  Secondly,
    4855                 :            :  * worker pools serve mix of short, long and very long running works making
    4856                 :            :  * blocked draining impractical.
    4857                 :            :  *
    4858                 :            :  * This is solved by allowing the pools to be disassociated from the CPU
    4859                 :            :  * running as an unbound one and allowing it to be reattached later if the
    4860                 :            :  * cpu comes back online.
    4861                 :            :  */
    4862                 :            : 
    4863                 :          0 : static void unbind_workers(int cpu)
    4864                 :            : {
    4865                 :            :         struct worker_pool *pool;
    4866                 :            :         struct worker *worker;
    4867                 :            : 
    4868                 :          0 :         for_each_cpu_worker_pool(pool, cpu) {
    4869                 :          0 :                 mutex_lock(&wq_pool_attach_mutex);
    4870                 :            :                 spin_lock_irq(&pool->lock);
    4871                 :            : 
    4872                 :            :                 /*
    4873                 :            :                  * We've blocked all attach/detach operations. Make all workers
    4874                 :            :                  * unbound and set DISASSOCIATED.  Before this, all workers
    4875                 :            :                  * except for the ones which are still executing works from
    4876                 :            :                  * before the last CPU down must be on the cpu.  After
    4877                 :            :                  * this, they may become diasporas.
    4878                 :            :                  */
    4879                 :          0 :                 for_each_pool_worker(worker, pool)
    4880                 :          0 :                         worker->flags |= WORKER_UNBOUND;
    4881                 :            : 
    4882                 :          0 :                 pool->flags |= POOL_DISASSOCIATED;
    4883                 :            : 
    4884                 :            :                 spin_unlock_irq(&pool->lock);
    4885                 :          0 :                 mutex_unlock(&wq_pool_attach_mutex);
    4886                 :            : 
    4887                 :            :                 /*
    4888                 :            :                  * Call schedule() so that we cross rq->lock and thus can
    4889                 :            :                  * guarantee sched callbacks see the %WORKER_UNBOUND flag.
    4890                 :            :                  * This is necessary as scheduler callbacks may be invoked
    4891                 :            :                  * from other cpus.
    4892                 :            :                  */
    4893                 :          0 :                 schedule();
    4894                 :            : 
    4895                 :            :                 /*
    4896                 :            :                  * Sched callbacks are disabled now.  Zap nr_running.
    4897                 :            :                  * After this, nr_running stays zero and need_more_worker()
    4898                 :            :                  * and keep_working() are always true as long as the
    4899                 :            :                  * worklist is not empty.  This pool now behaves as an
    4900                 :            :                  * unbound (in terms of concurrency management) pool which
    4901                 :            :                  * are served by workers tied to the pool.
    4902                 :            :                  */
    4903                 :            :                 atomic_set(&pool->nr_running, 0);
    4904                 :            : 
    4905                 :            :                 /*
    4906                 :            :                  * With concurrency management just turned off, a busy
    4907                 :            :                  * worker blocking could lead to lengthy stalls.  Kick off
    4908                 :            :                  * unbound chain execution of currently pending work items.
    4909                 :            :                  */
    4910                 :            :                 spin_lock_irq(&pool->lock);
    4911                 :          0 :                 wake_up_worker(pool);
    4912                 :            :                 spin_unlock_irq(&pool->lock);
    4913                 :            :         }
    4914                 :          0 : }
    4915                 :            : 
    4916                 :            : /**
    4917                 :            :  * rebind_workers - rebind all workers of a pool to the associated CPU
    4918                 :            :  * @pool: pool of interest
    4919                 :            :  *
    4920                 :            :  * @pool->cpu is coming online.  Rebind all workers to the CPU.
    4921                 :            :  */
    4922                 :          3 : static void rebind_workers(struct worker_pool *pool)
    4923                 :            : {
    4924                 :            :         struct worker *worker;
    4925                 :            : 
    4926                 :            :         lockdep_assert_held(&wq_pool_attach_mutex);
    4927                 :            : 
    4928                 :            :         /*
    4929                 :            :          * Restore CPU affinity of all workers.  As all idle workers should
    4930                 :            :          * be on the run-queue of the associated CPU before any local
    4931                 :            :          * wake-ups for concurrency management happen, restore CPU affinity
    4932                 :            :          * of all workers first and then clear UNBOUND.  As we're called
    4933                 :            :          * from CPU_ONLINE, the following shouldn't fail.
    4934                 :            :          */
    4935                 :          3 :         for_each_pool_worker(worker, pool)
    4936                 :          3 :                 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
    4937                 :            :                                                   pool->attrs->cpumask) < 0);
    4938                 :            : 
    4939                 :            :         spin_lock_irq(&pool->lock);
    4940                 :            : 
    4941                 :          3 :         pool->flags &= ~POOL_DISASSOCIATED;
    4942                 :            : 
    4943                 :          3 :         for_each_pool_worker(worker, pool) {
    4944                 :          3 :                 unsigned int worker_flags = worker->flags;
    4945                 :            : 
    4946                 :            :                 /*
    4947                 :            :                  * A bound idle worker should actually be on the runqueue
    4948                 :            :                  * of the associated CPU for local wake-ups targeting it to
    4949                 :            :                  * work.  Kick all idle workers so that they migrate to the
    4950                 :            :                  * associated CPU.  Doing this in the same loop as
    4951                 :            :                  * replacing UNBOUND with REBOUND is safe as no worker will
    4952                 :            :                  * be bound before @pool->lock is released.
    4953                 :            :                  */
    4954                 :          3 :                 if (worker_flags & WORKER_IDLE)
    4955                 :          3 :                         wake_up_process(worker->task);
    4956                 :            : 
    4957                 :            :                 /*
    4958                 :            :                  * We want to clear UNBOUND but can't directly call
    4959                 :            :                  * worker_clr_flags() or adjust nr_running.  Atomically
    4960                 :            :                  * replace UNBOUND with another NOT_RUNNING flag REBOUND.
    4961                 :            :                  * @worker will clear REBOUND using worker_clr_flags() when
    4962                 :            :                  * it initiates the next execution cycle thus restoring
    4963                 :            :                  * concurrency management.  Note that when or whether
    4964                 :            :                  * @worker clears REBOUND doesn't affect correctness.
    4965                 :            :                  *
    4966                 :            :                  * WRITE_ONCE() is necessary because @worker->flags may be
    4967                 :            :                  * tested without holding any lock in
    4968                 :            :                  * wq_worker_running().  Without it, NOT_RUNNING test may
    4969                 :            :                  * fail incorrectly leading to premature concurrency
    4970                 :            :                  * management operations.
    4971                 :            :                  */
    4972                 :          3 :                 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
    4973                 :            :                 worker_flags |= WORKER_REBOUND;
    4974                 :          3 :                 worker_flags &= ~WORKER_UNBOUND;
    4975                 :            :                 WRITE_ONCE(worker->flags, worker_flags);
    4976                 :            :         }
    4977                 :            : 
    4978                 :            :         spin_unlock_irq(&pool->lock);
    4979                 :          3 : }
    4980                 :            : 
    4981                 :            : /**
    4982                 :            :  * restore_unbound_workers_cpumask - restore cpumask of unbound workers
    4983                 :            :  * @pool: unbound pool of interest
    4984                 :            :  * @cpu: the CPU which is coming up
    4985                 :            :  *
    4986                 :            :  * An unbound pool may end up with a cpumask which doesn't have any online
    4987                 :            :  * CPUs.  When a worker of such pool get scheduled, the scheduler resets
    4988                 :            :  * its cpus_allowed.  If @cpu is in @pool's cpumask which didn't have any
    4989                 :            :  * online CPU before, cpus_allowed of all its workers should be restored.
    4990                 :            :  */
    4991                 :          3 : static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
    4992                 :            : {
    4993                 :            :         static cpumask_t cpumask;
    4994                 :            :         struct worker *worker;
    4995                 :            : 
    4996                 :            :         lockdep_assert_held(&wq_pool_attach_mutex);
    4997                 :            : 
    4998                 :            :         /* is @cpu allowed for @pool? */
    4999                 :          3 :         if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
    5000                 :          3 :                 return;
    5001                 :            : 
    5002                 :            :         cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
    5003                 :            : 
    5004                 :            :         /* as we're called from CPU_ONLINE, the following shouldn't fail */
    5005                 :          3 :         for_each_pool_worker(worker, pool)
    5006                 :          3 :                 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
    5007                 :            : }
    5008                 :            : 
    5009                 :          3 : int workqueue_prepare_cpu(unsigned int cpu)
    5010                 :            : {
    5011                 :            :         struct worker_pool *pool;
    5012                 :            : 
    5013                 :          3 :         for_each_cpu_worker_pool(pool, cpu) {
    5014                 :          3 :                 if (pool->nr_workers)
    5015                 :          0 :                         continue;
    5016                 :          3 :                 if (!create_worker(pool))
    5017                 :            :                         return -ENOMEM;
    5018                 :            :         }
    5019                 :            :         return 0;
    5020                 :            : }
    5021                 :            : 
    5022                 :          3 : int workqueue_online_cpu(unsigned int cpu)
    5023                 :            : {
    5024                 :            :         struct worker_pool *pool;
    5025                 :            :         struct workqueue_struct *wq;
    5026                 :            :         int pi;
    5027                 :            : 
    5028                 :          3 :         mutex_lock(&wq_pool_mutex);
    5029                 :            : 
    5030                 :          3 :         for_each_pool(pool, pi) {
    5031                 :          3 :                 mutex_lock(&wq_pool_attach_mutex);
    5032                 :            : 
    5033                 :          3 :                 if (pool->cpu == cpu)
    5034                 :          3 :                         rebind_workers(pool);
    5035                 :          3 :                 else if (pool->cpu < 0)
    5036                 :          3 :                         restore_unbound_workers_cpumask(pool, cpu);
    5037                 :            : 
    5038                 :          3 :                 mutex_unlock(&wq_pool_attach_mutex);
    5039                 :            :         }
    5040                 :            : 
    5041                 :            :         /* update NUMA affinity of unbound workqueues */
    5042                 :          3 :         list_for_each_entry(wq, &workqueues, list)
    5043                 :          3 :                 wq_update_unbound_numa(wq, cpu, true);
    5044                 :            : 
    5045                 :          3 :         mutex_unlock(&wq_pool_mutex);
    5046                 :          3 :         return 0;
    5047                 :            : }
    5048                 :            : 
    5049                 :          0 : int workqueue_offline_cpu(unsigned int cpu)
    5050                 :            : {
    5051                 :            :         struct workqueue_struct *wq;
    5052                 :            : 
    5053                 :            :         /* unbinding per-cpu workers should happen on the local CPU */
    5054                 :          0 :         if (WARN_ON(cpu != smp_processor_id()))
    5055                 :            :                 return -1;
    5056                 :            : 
    5057                 :          0 :         unbind_workers(cpu);
    5058                 :            : 
    5059                 :            :         /* update NUMA affinity of unbound workqueues */
    5060                 :          0 :         mutex_lock(&wq_pool_mutex);
    5061                 :          0 :         list_for_each_entry(wq, &workqueues, list)
    5062                 :          0 :                 wq_update_unbound_numa(wq, cpu, false);
    5063                 :          0 :         mutex_unlock(&wq_pool_mutex);
    5064                 :            : 
    5065                 :          0 :         return 0;
    5066                 :            : }
    5067                 :            : 
    5068                 :            : struct work_for_cpu {
    5069                 :            :         struct work_struct work;
    5070                 :            :         long (*fn)(void *);
    5071                 :            :         void *arg;
    5072                 :            :         long ret;
    5073                 :            : };
    5074                 :            : 
    5075                 :          0 : static void work_for_cpu_fn(struct work_struct *work)
    5076                 :            : {
    5077                 :            :         struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
    5078                 :            : 
    5079                 :          0 :         wfc->ret = wfc->fn(wfc->arg);
    5080                 :          0 : }
    5081                 :            : 
    5082                 :            : /**
    5083                 :            :  * work_on_cpu - run a function in thread context on a particular cpu
    5084                 :            :  * @cpu: the cpu to run on
    5085                 :            :  * @fn: the function to run
    5086                 :            :  * @arg: the function arg
    5087                 :            :  *
    5088                 :            :  * It is up to the caller to ensure that the cpu doesn't go offline.
    5089                 :            :  * The caller must not hold any locks which would prevent @fn from completing.
    5090                 :            :  *
    5091                 :            :  * Return: The value @fn returns.
    5092                 :            :  */
    5093                 :          0 : long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
    5094                 :            : {
    5095                 :          0 :         struct work_for_cpu wfc = { .fn = fn, .arg = arg };
    5096                 :            : 
    5097                 :          0 :         INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
    5098                 :            :         schedule_work_on(cpu, &wfc.work);
    5099                 :            :         flush_work(&wfc.work);
    5100                 :            :         destroy_work_on_stack(&wfc.work);
    5101                 :          0 :         return wfc.ret;
    5102                 :            : }
    5103                 :            : EXPORT_SYMBOL_GPL(work_on_cpu);
    5104                 :            : 
    5105                 :            : /**
    5106                 :            :  * work_on_cpu_safe - run a function in thread context on a particular cpu
    5107                 :            :  * @cpu: the cpu to run on
    5108                 :            :  * @fn:  the function to run
    5109                 :            :  * @arg: the function argument
    5110                 :            :  *
    5111                 :            :  * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold
    5112                 :            :  * any locks which would prevent @fn from completing.
    5113                 :            :  *
    5114                 :            :  * Return: The value @fn returns.
    5115                 :            :  */
    5116                 :          0 : long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
    5117                 :            : {
    5118                 :            :         long ret = -ENODEV;
    5119                 :            : 
    5120                 :            :         get_online_cpus();
    5121                 :          0 :         if (cpu_online(cpu))
    5122                 :          0 :                 ret = work_on_cpu(cpu, fn, arg);
    5123                 :            :         put_online_cpus();
    5124                 :          0 :         return ret;
    5125                 :            : }
    5126                 :            : EXPORT_SYMBOL_GPL(work_on_cpu_safe);
    5127                 :            : #endif /* CONFIG_SMP */
    5128                 :            : 
    5129                 :            : #ifdef CONFIG_FREEZER
    5130                 :            : 
    5131                 :            : /**
    5132                 :            :  * freeze_workqueues_begin - begin freezing workqueues
    5133                 :            :  *
    5134                 :            :  * Start freezing workqueues.  After this function returns, all freezable
    5135                 :            :  * workqueues will queue new works to their delayed_works list instead of
    5136                 :            :  * pool->worklist.
    5137                 :            :  *
    5138                 :            :  * CONTEXT:
    5139                 :            :  * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
    5140                 :            :  */
    5141                 :          0 : void freeze_workqueues_begin(void)
    5142                 :            : {
    5143                 :            :         struct workqueue_struct *wq;
    5144                 :            :         struct pool_workqueue *pwq;
    5145                 :            : 
    5146                 :          0 :         mutex_lock(&wq_pool_mutex);
    5147                 :            : 
    5148                 :          0 :         WARN_ON_ONCE(workqueue_freezing);
    5149                 :          0 :         workqueue_freezing = true;
    5150                 :            : 
    5151                 :          0 :         list_for_each_entry(wq, &workqueues, list) {
    5152                 :          0 :                 mutex_lock(&wq->mutex);
    5153                 :          0 :                 for_each_pwq(pwq, wq)
    5154                 :          0 :                         pwq_adjust_max_active(pwq);
    5155                 :          0 :                 mutex_unlock(&wq->mutex);
    5156                 :            :         }
    5157                 :            : 
    5158                 :          0 :         mutex_unlock(&wq_pool_mutex);
    5159                 :          0 : }
    5160                 :            : 
    5161                 :            : /**
    5162                 :            :  * freeze_workqueues_busy - are freezable workqueues still busy?
    5163                 :            :  *
    5164                 :            :  * Check whether freezing is complete.  This function must be called
    5165                 :            :  * between freeze_workqueues_begin() and thaw_workqueues().
    5166                 :            :  *
    5167                 :            :  * CONTEXT:
    5168                 :            :  * Grabs and releases wq_pool_mutex.
    5169                 :            :  *
    5170                 :            :  * Return:
    5171                 :            :  * %true if some freezable workqueues are still busy.  %false if freezing
    5172                 :            :  * is complete.
    5173                 :            :  */
    5174                 :          0 : bool freeze_workqueues_busy(void)
    5175                 :            : {
    5176                 :            :         bool busy = false;
    5177                 :            :         struct workqueue_struct *wq;
    5178                 :            :         struct pool_workqueue *pwq;
    5179                 :            : 
    5180                 :          0 :         mutex_lock(&wq_pool_mutex);
    5181                 :            : 
    5182                 :          0 :         WARN_ON_ONCE(!workqueue_freezing);
    5183                 :            : 
    5184                 :          0 :         list_for_each_entry(wq, &workqueues, list) {
    5185                 :          0 :                 if (!(wq->flags & WQ_FREEZABLE))
    5186                 :          0 :                         continue;
    5187                 :            :                 /*
    5188                 :            :                  * nr_active is monotonically decreasing.  It's safe
    5189                 :            :                  * to peek without lock.
    5190                 :            :                  */
    5191                 :            :                 rcu_read_lock();
    5192                 :          0 :                 for_each_pwq(pwq, wq) {
    5193                 :          0 :                         WARN_ON_ONCE(pwq->nr_active < 0);
    5194                 :          0 :                         if (pwq->nr_active) {
    5195                 :            :                                 busy = true;
    5196                 :            :                                 rcu_read_unlock();
    5197                 :            :                                 goto out_unlock;
    5198                 :            :                         }
    5199                 :            :                 }
    5200                 :            :                 rcu_read_unlock();
    5201                 :            :         }
    5202                 :            : out_unlock:
    5203                 :          0 :         mutex_unlock(&wq_pool_mutex);
    5204                 :          0 :         return busy;
    5205                 :            : }
    5206                 :            : 
    5207                 :            : /**
    5208                 :            :  * thaw_workqueues - thaw workqueues
    5209                 :            :  *
    5210                 :            :  * Thaw workqueues.  Normal queueing is restored and all collected
    5211                 :            :  * frozen works are transferred to their respective pool worklists.
    5212                 :            :  *
    5213                 :            :  * CONTEXT:
    5214                 :            :  * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
    5215                 :            :  */
    5216                 :          0 : void thaw_workqueues(void)
    5217                 :            : {
    5218                 :            :         struct workqueue_struct *wq;
    5219                 :            :         struct pool_workqueue *pwq;
    5220                 :            : 
    5221                 :          0 :         mutex_lock(&wq_pool_mutex);
    5222                 :            : 
    5223                 :          0 :         if (!workqueue_freezing)
    5224                 :            :                 goto out_unlock;
    5225                 :            : 
    5226                 :          0 :         workqueue_freezing = false;
    5227                 :            : 
    5228                 :            :         /* restore max_active and repopulate worklist */
    5229                 :          0 :         list_for_each_entry(wq, &workqueues, list) {
    5230                 :          0 :                 mutex_lock(&wq->mutex);
    5231                 :          0 :                 for_each_pwq(pwq, wq)
    5232                 :          0 :                         pwq_adjust_max_active(pwq);
    5233                 :          0 :                 mutex_unlock(&wq->mutex);
    5234                 :            :         }
    5235                 :            : 
    5236                 :            : out_unlock:
    5237                 :          0 :         mutex_unlock(&wq_pool_mutex);
    5238                 :          0 : }
    5239                 :            : #endif /* CONFIG_FREEZER */
    5240                 :            : 
    5241                 :          0 : static int workqueue_apply_unbound_cpumask(void)
    5242                 :            : {
    5243                 :          0 :         LIST_HEAD(ctxs);
    5244                 :            :         int ret = 0;
    5245                 :            :         struct workqueue_struct *wq;
    5246                 :            :         struct apply_wqattrs_ctx *ctx, *n;
    5247                 :            : 
    5248                 :            :         lockdep_assert_held(&wq_pool_mutex);
    5249                 :            : 
    5250                 :          0 :         list_for_each_entry(wq, &workqueues, list) {
    5251                 :          0 :                 if (!(wq->flags & WQ_UNBOUND))
    5252                 :          0 :                         continue;
    5253                 :            :                 /* creating multiple pwqs breaks ordering guarantee */
    5254                 :          0 :                 if (wq->flags & __WQ_ORDERED)
    5255                 :          0 :                         continue;
    5256                 :            : 
    5257                 :          0 :                 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
    5258                 :          0 :                 if (!ctx) {
    5259                 :            :                         ret = -ENOMEM;
    5260                 :            :                         break;
    5261                 :            :                 }
    5262                 :            : 
    5263                 :          0 :                 list_add_tail(&ctx->list, &ctxs);
    5264                 :            :         }
    5265                 :            : 
    5266                 :          0 :         list_for_each_entry_safe(ctx, n, &ctxs, list) {
    5267                 :          0 :                 if (!ret)
    5268                 :          0 :                         apply_wqattrs_commit(ctx);
    5269                 :          0 :                 apply_wqattrs_cleanup(ctx);
    5270                 :            :         }
    5271                 :            : 
    5272                 :          0 :         return ret;
    5273                 :            : }
    5274                 :            : 
    5275                 :            : /**
    5276                 :            :  *  workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
    5277                 :            :  *  @cpumask: the cpumask to set
    5278                 :            :  *
    5279                 :            :  *  The low-level workqueues cpumask is a global cpumask that limits
    5280                 :            :  *  the affinity of all unbound workqueues.  This function check the @cpumask
    5281                 :            :  *  and apply it to all unbound workqueues and updates all pwqs of them.
    5282                 :            :  *
    5283                 :            :  *  Retun:      0       - Success
    5284                 :            :  *              -EINVAL - Invalid @cpumask
    5285                 :            :  *              -ENOMEM - Failed to allocate memory for attrs or pwqs.
    5286                 :            :  */
    5287                 :          0 : int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
    5288                 :            : {
    5289                 :            :         int ret = -EINVAL;
    5290                 :            :         cpumask_var_t saved_cpumask;
    5291                 :            : 
    5292                 :            :         if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
    5293                 :            :                 return -ENOMEM;
    5294                 :            : 
    5295                 :            :         /*
    5296                 :            :          * Not excluding isolated cpus on purpose.
    5297                 :            :          * If the user wishes to include them, we allow that.
    5298                 :            :          */
    5299                 :            :         cpumask_and(cpumask, cpumask, cpu_possible_mask);
    5300                 :          0 :         if (!cpumask_empty(cpumask)) {
    5301                 :            :                 apply_wqattrs_lock();
    5302                 :            : 
    5303                 :            :                 /* save the old wq_unbound_cpumask. */
    5304                 :            :                 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
    5305                 :            : 
    5306                 :            :                 /* update wq_unbound_cpumask at first and apply it to wqs. */
    5307                 :            :                 cpumask_copy(wq_unbound_cpumask, cpumask);
    5308                 :          0 :                 ret = workqueue_apply_unbound_cpumask();
    5309                 :            : 
    5310                 :            :                 /* restore the wq_unbound_cpumask when failed. */
    5311                 :          0 :                 if (ret < 0)
    5312                 :            :                         cpumask_copy(wq_unbound_cpumask, saved_cpumask);
    5313                 :            : 
    5314                 :            :                 apply_wqattrs_unlock();
    5315                 :            :         }
    5316                 :            : 
    5317                 :            :         free_cpumask_var(saved_cpumask);
    5318                 :            :         return ret;
    5319                 :            : }
    5320                 :            : 
    5321                 :            : #ifdef CONFIG_SYSFS
    5322                 :            : /*
    5323                 :            :  * Workqueues with WQ_SYSFS flag set is visible to userland via
    5324                 :            :  * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the
    5325                 :            :  * following attributes.
    5326                 :            :  *
    5327                 :            :  *  per_cpu     RO bool : whether the workqueue is per-cpu or unbound
    5328                 :            :  *  max_active  RW int  : maximum number of in-flight work items
    5329                 :            :  *
    5330                 :            :  * Unbound workqueues have the following extra attributes.
    5331                 :            :  *
    5332                 :            :  *  pool_ids    RO int  : the associated pool IDs for each node
    5333                 :            :  *  nice        RW int  : nice value of the workers
    5334                 :            :  *  cpumask     RW mask : bitmask of allowed CPUs for the workers
    5335                 :            :  *  numa        RW bool : whether enable NUMA affinity
    5336                 :            :  */
    5337                 :            : struct wq_device {
    5338                 :            :         struct workqueue_struct         *wq;
    5339                 :            :         struct device                   dev;
    5340                 :            : };
    5341                 :            : 
    5342                 :            : static struct workqueue_struct *dev_to_wq(struct device *dev)
    5343                 :            : {
    5344                 :            :         struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
    5345                 :            : 
    5346                 :          0 :         return wq_dev->wq;
    5347                 :            : }
    5348                 :            : 
    5349                 :          0 : static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
    5350                 :            :                             char *buf)
    5351                 :            : {
    5352                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5353                 :            : 
    5354                 :          0 :         return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
    5355                 :            : }
    5356                 :            : static DEVICE_ATTR_RO(per_cpu);
    5357                 :            : 
    5358                 :          0 : static ssize_t max_active_show(struct device *dev,
    5359                 :            :                                struct device_attribute *attr, char *buf)
    5360                 :            : {
    5361                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5362                 :            : 
    5363                 :          0 :         return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
    5364                 :            : }
    5365                 :            : 
    5366                 :          0 : static ssize_t max_active_store(struct device *dev,
    5367                 :            :                                 struct device_attribute *attr, const char *buf,
    5368                 :            :                                 size_t count)
    5369                 :            : {
    5370                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5371                 :            :         int val;
    5372                 :            : 
    5373                 :          0 :         if (sscanf(buf, "%d", &val) != 1 || val <= 0)
    5374                 :            :                 return -EINVAL;
    5375                 :            : 
    5376                 :          0 :         workqueue_set_max_active(wq, val);
    5377                 :          0 :         return count;
    5378                 :            : }
    5379                 :            : static DEVICE_ATTR_RW(max_active);
    5380                 :            : 
    5381                 :            : static struct attribute *wq_sysfs_attrs[] = {
    5382                 :            :         &dev_attr_per_cpu.attr,
    5383                 :            :         &dev_attr_max_active.attr,
    5384                 :            :         NULL,
    5385                 :            : };
    5386                 :            : ATTRIBUTE_GROUPS(wq_sysfs);
    5387                 :            : 
    5388                 :          0 : static ssize_t wq_pool_ids_show(struct device *dev,
    5389                 :            :                                 struct device_attribute *attr, char *buf)
    5390                 :            : {
    5391                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5392                 :            :         const char *delim = "";
    5393                 :            :         int node, written = 0;
    5394                 :            : 
    5395                 :            :         get_online_cpus();
    5396                 :            :         rcu_read_lock();
    5397                 :          0 :         for_each_node(node) {
    5398                 :          0 :                 written += scnprintf(buf + written, PAGE_SIZE - written,
    5399                 :            :                                      "%s%d:%d", delim, node,
    5400                 :          0 :                                      unbound_pwq_by_node(wq, node)->pool->id);
    5401                 :            :                 delim = " ";
    5402                 :            :         }
    5403                 :          0 :         written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
    5404                 :            :         rcu_read_unlock();
    5405                 :            :         put_online_cpus();
    5406                 :            : 
    5407                 :          0 :         return written;
    5408                 :            : }
    5409                 :            : 
    5410                 :          0 : static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
    5411                 :            :                             char *buf)
    5412                 :            : {
    5413                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5414                 :            :         int written;
    5415                 :            : 
    5416                 :          0 :         mutex_lock(&wq->mutex);
    5417                 :          0 :         written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
    5418                 :          0 :         mutex_unlock(&wq->mutex);
    5419                 :            : 
    5420                 :          0 :         return written;
    5421                 :            : }
    5422                 :            : 
    5423                 :            : /* prepare workqueue_attrs for sysfs store operations */
    5424                 :          0 : static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
    5425                 :            : {
    5426                 :            :         struct workqueue_attrs *attrs;
    5427                 :            : 
    5428                 :            :         lockdep_assert_held(&wq_pool_mutex);
    5429                 :            : 
    5430                 :          0 :         attrs = alloc_workqueue_attrs();
    5431                 :          0 :         if (!attrs)
    5432                 :            :                 return NULL;
    5433                 :            : 
    5434                 :          0 :         copy_workqueue_attrs(attrs, wq->unbound_attrs);
    5435                 :          0 :         return attrs;
    5436                 :            : }
    5437                 :            : 
    5438                 :          0 : static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
    5439                 :            :                              const char *buf, size_t count)
    5440                 :            : {
    5441                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5442                 :            :         struct workqueue_attrs *attrs;
    5443                 :            :         int ret = -ENOMEM;
    5444                 :            : 
    5445                 :            :         apply_wqattrs_lock();
    5446                 :            : 
    5447                 :          0 :         attrs = wq_sysfs_prep_attrs(wq);
    5448                 :          0 :         if (!attrs)
    5449                 :            :                 goto out_unlock;
    5450                 :            : 
    5451                 :          0 :         if (sscanf(buf, "%d", &attrs->nice) == 1 &&
    5452                 :          0 :             attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
    5453                 :          0 :                 ret = apply_workqueue_attrs_locked(wq, attrs);
    5454                 :            :         else
    5455                 :            :                 ret = -EINVAL;
    5456                 :            : 
    5457                 :            : out_unlock:
    5458                 :            :         apply_wqattrs_unlock();
    5459                 :            :         free_workqueue_attrs(attrs);
    5460                 :          0 :         return ret ?: count;
    5461                 :            : }
    5462                 :            : 
    5463                 :          0 : static ssize_t wq_cpumask_show(struct device *dev,
    5464                 :            :                                struct device_attribute *attr, char *buf)
    5465                 :            : {
    5466                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5467                 :            :         int written;
    5468                 :            : 
    5469                 :          0 :         mutex_lock(&wq->mutex);
    5470                 :          0 :         written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
    5471                 :          0 :                             cpumask_pr_args(wq->unbound_attrs->cpumask));
    5472                 :          0 :         mutex_unlock(&wq->mutex);
    5473                 :          0 :         return written;
    5474                 :            : }
    5475                 :            : 
    5476                 :          0 : static ssize_t wq_cpumask_store(struct device *dev,
    5477                 :            :                                 struct device_attribute *attr,
    5478                 :            :                                 const char *buf, size_t count)
    5479                 :            : {
    5480                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5481                 :            :         struct workqueue_attrs *attrs;
    5482                 :            :         int ret = -ENOMEM;
    5483                 :            : 
    5484                 :            :         apply_wqattrs_lock();
    5485                 :            : 
    5486                 :          0 :         attrs = wq_sysfs_prep_attrs(wq);
    5487                 :          0 :         if (!attrs)
    5488                 :            :                 goto out_unlock;
    5489                 :            : 
    5490                 :          0 :         ret = cpumask_parse(buf, attrs->cpumask);
    5491                 :          0 :         if (!ret)
    5492                 :          0 :                 ret = apply_workqueue_attrs_locked(wq, attrs);
    5493                 :            : 
    5494                 :            : out_unlock:
    5495                 :            :         apply_wqattrs_unlock();
    5496                 :            :         free_workqueue_attrs(attrs);
    5497                 :          0 :         return ret ?: count;
    5498                 :            : }
    5499                 :            : 
    5500                 :          0 : static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
    5501                 :            :                             char *buf)
    5502                 :            : {
    5503                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5504                 :            :         int written;
    5505                 :            : 
    5506                 :          0 :         mutex_lock(&wq->mutex);
    5507                 :          0 :         written = scnprintf(buf, PAGE_SIZE, "%d\n",
    5508                 :          0 :                             !wq->unbound_attrs->no_numa);
    5509                 :          0 :         mutex_unlock(&wq->mutex);
    5510                 :            : 
    5511                 :          0 :         return written;
    5512                 :            : }
    5513                 :            : 
    5514                 :          0 : static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
    5515                 :            :                              const char *buf, size_t count)
    5516                 :            : {
    5517                 :            :         struct workqueue_struct *wq = dev_to_wq(dev);
    5518                 :            :         struct workqueue_attrs *attrs;
    5519                 :            :         int v, ret = -ENOMEM;
    5520                 :            : 
    5521                 :            :         apply_wqattrs_lock();
    5522                 :            : 
    5523                 :          0 :         attrs = wq_sysfs_prep_attrs(wq);
    5524                 :          0 :         if (!attrs)
    5525                 :            :                 goto out_unlock;
    5526                 :            : 
    5527                 :            :         ret = -EINVAL;
    5528                 :          0 :         if (sscanf(buf, "%d", &v) == 1) {
    5529                 :          0 :                 attrs->no_numa = !v;
    5530                 :          0 :                 ret = apply_workqueue_attrs_locked(wq, attrs);
    5531                 :            :         }
    5532                 :            : 
    5533                 :            : out_unlock:
    5534                 :            :         apply_wqattrs_unlock();
    5535                 :            :         free_workqueue_attrs(attrs);
    5536                 :          0 :         return ret ?: count;
    5537                 :            : }
    5538                 :            : 
    5539                 :            : static struct device_attribute wq_sysfs_unbound_attrs[] = {
    5540                 :            :         __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
    5541                 :            :         __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
    5542                 :            :         __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
    5543                 :            :         __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
    5544                 :            :         __ATTR_NULL,
    5545                 :            : };
    5546                 :            : 
    5547                 :            : static struct bus_type wq_subsys = {
    5548                 :            :         .name                           = "workqueue",
    5549                 :            :         .dev_groups                     = wq_sysfs_groups,
    5550                 :            : };
    5551                 :            : 
    5552                 :          0 : static ssize_t wq_unbound_cpumask_show(struct device *dev,
    5553                 :            :                 struct device_attribute *attr, char *buf)
    5554                 :            : {
    5555                 :            :         int written;
    5556                 :            : 
    5557                 :          0 :         mutex_lock(&wq_pool_mutex);
    5558                 :          0 :         written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
    5559                 :            :                             cpumask_pr_args(wq_unbound_cpumask));
    5560                 :          0 :         mutex_unlock(&wq_pool_mutex);
    5561                 :            : 
    5562                 :          0 :         return written;
    5563                 :            : }
    5564                 :            : 
    5565                 :          0 : static ssize_t wq_unbound_cpumask_store(struct device *dev,
    5566                 :            :                 struct device_attribute *attr, const char *buf, size_t count)
    5567                 :            : {
    5568                 :            :         cpumask_var_t cpumask;
    5569                 :            :         int ret;
    5570                 :            : 
    5571                 :            :         if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
    5572                 :            :                 return -ENOMEM;
    5573                 :            : 
    5574                 :          0 :         ret = cpumask_parse(buf, cpumask);
    5575                 :          0 :         if (!ret)
    5576                 :          0 :                 ret = workqueue_set_unbound_cpumask(cpumask);
    5577                 :            : 
    5578                 :            :         free_cpumask_var(cpumask);
    5579                 :          0 :         return ret ? ret : count;
    5580                 :            : }
    5581                 :            : 
    5582                 :            : static struct device_attribute wq_sysfs_cpumask_attr =
    5583                 :            :         __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
    5584                 :            :                wq_unbound_cpumask_store);
    5585                 :            : 
    5586                 :          3 : static int __init wq_sysfs_init(void)
    5587                 :            : {
    5588                 :            :         int err;
    5589                 :            : 
    5590                 :          3 :         err = subsys_virtual_register(&wq_subsys, NULL);
    5591                 :          3 :         if (err)
    5592                 :            :                 return err;
    5593                 :            : 
    5594                 :          3 :         return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
    5595                 :            : }
    5596                 :            : core_initcall(wq_sysfs_init);
    5597                 :            : 
    5598                 :          0 : static void wq_device_release(struct device *dev)
    5599                 :            : {
    5600                 :          0 :         struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
    5601                 :            : 
    5602                 :          0 :         kfree(wq_dev);
    5603                 :          0 : }
    5604                 :            : 
    5605                 :            : /**
    5606                 :            :  * workqueue_sysfs_register - make a workqueue visible in sysfs
    5607                 :            :  * @wq: the workqueue to register
    5608                 :            :  *
    5609                 :            :  * Expose @wq in sysfs under /sys/bus/workqueue/devices.
    5610                 :            :  * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
    5611                 :            :  * which is the preferred method.
    5612                 :            :  *
    5613                 :            :  * Workqueue user should use this function directly iff it wants to apply
    5614                 :            :  * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
    5615                 :            :  * apply_workqueue_attrs() may race against userland updating the
    5616                 :            :  * attributes.
    5617                 :            :  *
    5618                 :            :  * Return: 0 on success, -errno on failure.
    5619                 :            :  */
    5620                 :          3 : int workqueue_sysfs_register(struct workqueue_struct *wq)
    5621                 :            : {
    5622                 :            :         struct wq_device *wq_dev;
    5623                 :            :         int ret;
    5624                 :            : 
    5625                 :            :         /*
    5626                 :            :          * Adjusting max_active or creating new pwqs by applying
    5627                 :            :          * attributes breaks ordering guarantee.  Disallow exposing ordered
    5628                 :            :          * workqueues.
    5629                 :            :          */
    5630                 :          3 :         if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
    5631                 :            :                 return -EINVAL;
    5632                 :            : 
    5633                 :          3 :         wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
    5634                 :          3 :         if (!wq_dev)
    5635                 :            :                 return -ENOMEM;
    5636                 :            : 
    5637                 :          3 :         wq_dev->wq = wq;
    5638                 :          3 :         wq_dev->dev.bus = &wq_subsys;
    5639                 :          3 :         wq_dev->dev.release = wq_device_release;
    5640                 :          3 :         dev_set_name(&wq_dev->dev, "%s", wq->name);
    5641                 :            : 
    5642                 :            :         /*
    5643                 :            :          * unbound_attrs are created separately.  Suppress uevent until
    5644                 :            :          * everything is ready.
    5645                 :            :          */
    5646                 :            :         dev_set_uevent_suppress(&wq_dev->dev, true);
    5647                 :            : 
    5648                 :          3 :         ret = device_register(&wq_dev->dev);
    5649                 :          3 :         if (ret) {
    5650                 :          0 :                 put_device(&wq_dev->dev);
    5651                 :          0 :                 wq->wq_dev = NULL;
    5652                 :          0 :                 return ret;
    5653                 :            :         }
    5654                 :            : 
    5655                 :          3 :         if (wq->flags & WQ_UNBOUND) {
    5656                 :            :                 struct device_attribute *attr;
    5657                 :            : 
    5658                 :          3 :                 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
    5659                 :          3 :                         ret = device_create_file(&wq_dev->dev, attr);
    5660                 :          3 :                         if (ret) {
    5661                 :          0 :                                 device_unregister(&wq_dev->dev);
    5662                 :          0 :                                 wq->wq_dev = NULL;
    5663                 :          0 :                                 return ret;
    5664                 :            :                         }
    5665                 :            :                 }
    5666                 :            :         }
    5667                 :            : 
    5668                 :            :         dev_set_uevent_suppress(&wq_dev->dev, false);
    5669                 :          3 :         kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
    5670                 :          3 :         return 0;
    5671                 :            : }
    5672                 :            : 
    5673                 :            : /**
    5674                 :            :  * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
    5675                 :            :  * @wq: the workqueue to unregister
    5676                 :            :  *
    5677                 :            :  * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
    5678                 :            :  */
    5679                 :            : static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
    5680                 :            : {
    5681                 :          2 :         struct wq_device *wq_dev = wq->wq_dev;
    5682                 :            : 
    5683                 :          2 :         if (!wq->wq_dev)
    5684                 :            :                 return;
    5685                 :            : 
    5686                 :          0 :         wq->wq_dev = NULL;
    5687                 :          0 :         device_unregister(&wq_dev->dev);
    5688                 :            : }
    5689                 :            : #else   /* CONFIG_SYSFS */
    5690                 :            : static void workqueue_sysfs_unregister(struct workqueue_struct *wq)     { }
    5691                 :            : #endif  /* CONFIG_SYSFS */
    5692                 :            : 
    5693                 :            : /*
    5694                 :            :  * Workqueue watchdog.
    5695                 :            :  *
    5696                 :            :  * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
    5697                 :            :  * flush dependency, a concurrency managed work item which stays RUNNING
    5698                 :            :  * indefinitely.  Workqueue stalls can be very difficult to debug as the
    5699                 :            :  * usual warning mechanisms don't trigger and internal workqueue state is
    5700                 :            :  * largely opaque.
    5701                 :            :  *
    5702                 :            :  * Workqueue watchdog monitors all worker pools periodically and dumps
    5703                 :            :  * state if some pools failed to make forward progress for a while where
    5704                 :            :  * forward progress is defined as the first item on ->worklist changing.
    5705                 :            :  *
    5706                 :            :  * This mechanism is controlled through the kernel parameter
    5707                 :            :  * "workqueue.watchdog_thresh" which can be updated at runtime through the
    5708                 :            :  * corresponding sysfs parameter file.
    5709                 :            :  */
    5710                 :            : #ifdef CONFIG_WQ_WATCHDOG
    5711                 :            : 
    5712                 :            : static unsigned long wq_watchdog_thresh = 30;
    5713                 :            : static struct timer_list wq_watchdog_timer;
    5714                 :            : 
    5715                 :            : static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
    5716                 :            : static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
    5717                 :            : 
    5718                 :            : static void wq_watchdog_reset_touched(void)
    5719                 :            : {
    5720                 :            :         int cpu;
    5721                 :            : 
    5722                 :            :         wq_watchdog_touched = jiffies;
    5723                 :            :         for_each_possible_cpu(cpu)
    5724                 :            :                 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
    5725                 :            : }
    5726                 :            : 
    5727                 :            : static void wq_watchdog_timer_fn(struct timer_list *unused)
    5728                 :            : {
    5729                 :            :         unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
    5730                 :            :         bool lockup_detected = false;
    5731                 :            :         struct worker_pool *pool;
    5732                 :            :         int pi;
    5733                 :            : 
    5734                 :            :         if (!thresh)
    5735                 :            :                 return;
    5736                 :            : 
    5737                 :            :         rcu_read_lock();
    5738                 :            : 
    5739                 :            :         for_each_pool(pool, pi) {
    5740                 :            :                 unsigned long pool_ts, touched, ts;
    5741                 :            : 
    5742                 :            :                 if (list_empty(&pool->worklist))
    5743                 :            :                         continue;
    5744                 :            : 
    5745                 :            :                 /* get the latest of pool and touched timestamps */
    5746                 :            :                 pool_ts = READ_ONCE(pool->watchdog_ts);
    5747                 :            :                 touched = READ_ONCE(wq_watchdog_touched);
    5748                 :            : 
    5749                 :            :                 if (time_after(pool_ts, touched))
    5750                 :            :                         ts = pool_ts;
    5751                 :            :                 else
    5752                 :            :                         ts = touched;
    5753                 :            : 
    5754                 :            :                 if (pool->cpu >= 0) {
    5755                 :            :                         unsigned long cpu_touched =
    5756                 :            :                                 READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
    5757                 :            :                                                   pool->cpu));
    5758                 :            :                         if (time_after(cpu_touched, ts))
    5759                 :            :                                 ts = cpu_touched;
    5760                 :            :                 }
    5761                 :            : 
    5762                 :            :                 /* did we stall? */
    5763                 :            :                 if (time_after(jiffies, ts + thresh)) {
    5764                 :            :                         lockup_detected = true;
    5765                 :            :                         pr_emerg("BUG: workqueue lockup - pool");
    5766                 :            :                         pr_cont_pool_info(pool);
    5767                 :            :                         pr_cont(" stuck for %us!\n",
    5768                 :            :                                 jiffies_to_msecs(jiffies - pool_ts) / 1000);
    5769                 :            :                 }
    5770                 :            :         }
    5771                 :            : 
    5772                 :            :         rcu_read_unlock();
    5773                 :            : 
    5774                 :            :         if (lockup_detected)
    5775                 :            :                 show_workqueue_state();
    5776                 :            : 
    5777                 :            :         wq_watchdog_reset_touched();
    5778                 :            :         mod_timer(&wq_watchdog_timer, jiffies + thresh);
    5779                 :            : }
    5780                 :            : 
    5781                 :            : notrace void wq_watchdog_touch(int cpu)
    5782                 :            : {
    5783                 :            :         if (cpu >= 0)
    5784                 :            :                 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
    5785                 :            :         else
    5786                 :            :                 wq_watchdog_touched = jiffies;
    5787                 :            : }
    5788                 :            : 
    5789                 :            : static void wq_watchdog_set_thresh(unsigned long thresh)
    5790                 :            : {
    5791                 :            :         wq_watchdog_thresh = 0;
    5792                 :            :         del_timer_sync(&wq_watchdog_timer);
    5793                 :            : 
    5794                 :            :         if (thresh) {
    5795                 :            :                 wq_watchdog_thresh = thresh;
    5796                 :            :                 wq_watchdog_reset_touched();
    5797                 :            :                 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
    5798                 :            :         }
    5799                 :            : }
    5800                 :            : 
    5801                 :            : static int wq_watchdog_param_set_thresh(const char *val,
    5802                 :            :                                         const struct kernel_param *kp)
    5803                 :            : {
    5804                 :            :         unsigned long thresh;
    5805                 :            :         int ret;
    5806                 :            : 
    5807                 :            :         ret = kstrtoul(val, 0, &thresh);
    5808                 :            :         if (ret)
    5809                 :            :                 return ret;
    5810                 :            : 
    5811                 :            :         if (system_wq)
    5812                 :            :                 wq_watchdog_set_thresh(thresh);
    5813                 :            :         else
    5814                 :            :                 wq_watchdog_thresh = thresh;
    5815                 :            : 
    5816                 :            :         return 0;
    5817                 :            : }
    5818                 :            : 
    5819                 :            : static const struct kernel_param_ops wq_watchdog_thresh_ops = {
    5820                 :            :         .set    = wq_watchdog_param_set_thresh,
    5821                 :            :         .get    = param_get_ulong,
    5822                 :            : };
    5823                 :            : 
    5824                 :            : module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
    5825                 :            :                 0644);
    5826                 :            : 
    5827                 :            : static void wq_watchdog_init(void)
    5828                 :            : {
    5829                 :            :         timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
    5830                 :            :         wq_watchdog_set_thresh(wq_watchdog_thresh);
    5831                 :            : }
    5832                 :            : 
    5833                 :            : #else   /* CONFIG_WQ_WATCHDOG */
    5834                 :            : 
    5835                 :            : static inline void wq_watchdog_init(void) { }
    5836                 :            : 
    5837                 :            : #endif  /* CONFIG_WQ_WATCHDOG */
    5838                 :            : 
    5839                 :            : static void __init wq_numa_init(void)
    5840                 :            : {
    5841                 :            :         cpumask_var_t *tbl;
    5842                 :            :         int node, cpu;
    5843                 :            : 
    5844                 :            :         if (num_possible_nodes() <= 1)
    5845                 :            :                 return;
    5846                 :            : 
    5847                 :            :         if (wq_disable_numa) {
    5848                 :            :                 pr_info("workqueue: NUMA affinity support disabled\n");
    5849                 :            :                 return;
    5850                 :            :         }
    5851                 :            : 
    5852                 :            :         wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
    5853                 :            :         BUG_ON(!wq_update_unbound_numa_attrs_buf);
    5854                 :            : 
    5855                 :            :         /*
    5856                 :            :          * We want masks of possible CPUs of each node which isn't readily
    5857                 :            :          * available.  Build one from cpu_to_node() which should have been
    5858                 :            :          * fully initialized by now.
    5859                 :            :          */
    5860                 :            :         tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL);
    5861                 :            :         BUG_ON(!tbl);
    5862                 :            : 
    5863                 :            :         for_each_node(node)
    5864                 :            :                 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
    5865                 :            :                                 node_online(node) ? node : NUMA_NO_NODE));
    5866                 :            : 
    5867                 :            :         for_each_possible_cpu(cpu) {
    5868                 :            :                 node = cpu_to_node(cpu);
    5869                 :            :                 if (WARN_ON(node == NUMA_NO_NODE)) {
    5870                 :            :                         pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
    5871                 :            :                         /* happens iff arch is bonkers, let's just proceed */
    5872                 :            :                         return;
    5873                 :            :                 }
    5874                 :            :                 cpumask_set_cpu(cpu, tbl[node]);
    5875                 :            :         }
    5876                 :            : 
    5877                 :            :         wq_numa_possible_cpumask = tbl;
    5878                 :            :         wq_numa_enabled = true;
    5879                 :            : }
    5880                 :            : 
    5881                 :            : /**
    5882                 :            :  * workqueue_init_early - early init for workqueue subsystem
    5883                 :            :  *
    5884                 :            :  * This is the first half of two-staged workqueue subsystem initialization
    5885                 :            :  * and invoked as soon as the bare basics - memory allocation, cpumasks and
    5886                 :            :  * idr are up.  It sets up all the data structures and system workqueues
    5887                 :            :  * and allows early boot code to create workqueues and queue/cancel work
    5888                 :            :  * items.  Actual work item execution starts only after kthreads can be
    5889                 :            :  * created and scheduled right before early initcalls.
    5890                 :            :  */
    5891                 :          3 : int __init workqueue_init_early(void)
    5892                 :            : {
    5893                 :          3 :         int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
    5894                 :            :         int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
    5895                 :            :         int i, cpu;
    5896                 :            : 
    5897                 :            :         WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
    5898                 :            : 
    5899                 :            :         BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
    5900                 :          3 :         cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
    5901                 :            : 
    5902                 :          3 :         pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
    5903                 :            : 
    5904                 :            :         /* initialize CPU pools */
    5905                 :          3 :         for_each_possible_cpu(cpu) {
    5906                 :            :                 struct worker_pool *pool;
    5907                 :            : 
    5908                 :            :                 i = 0;
    5909                 :          3 :                 for_each_cpu_worker_pool(pool, cpu) {
    5910                 :          3 :                         BUG_ON(init_worker_pool(pool));
    5911                 :          3 :                         pool->cpu = cpu;
    5912                 :          3 :                         cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
    5913                 :          3 :                         pool->attrs->nice = std_nice[i++];
    5914                 :          3 :                         pool->node = cpu_to_node(cpu);
    5915                 :            : 
    5916                 :            :                         /* alloc pool ID */
    5917                 :          3 :                         mutex_lock(&wq_pool_mutex);
    5918                 :          3 :                         BUG_ON(worker_pool_assign_id(pool));
    5919                 :          3 :                         mutex_unlock(&wq_pool_mutex);
    5920                 :            :                 }
    5921                 :            :         }
    5922                 :            : 
    5923                 :            :         /* create default unbound and ordered wq attrs */
    5924                 :          3 :         for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
    5925                 :            :                 struct workqueue_attrs *attrs;
    5926                 :            : 
    5927                 :          3 :                 BUG_ON(!(attrs = alloc_workqueue_attrs()));
    5928                 :          3 :                 attrs->nice = std_nice[i];
    5929                 :          3 :                 unbound_std_wq_attrs[i] = attrs;
    5930                 :            : 
    5931                 :            :                 /*
    5932                 :            :                  * An ordered wq should have only one pwq as ordering is
    5933                 :            :                  * guaranteed by max_active which is enforced by pwqs.
    5934                 :            :                  * Turn off NUMA so that dfl_pwq is used for all nodes.
    5935                 :            :                  */
    5936                 :          3 :                 BUG_ON(!(attrs = alloc_workqueue_attrs()));
    5937                 :          3 :                 attrs->nice = std_nice[i];
    5938                 :          3 :                 attrs->no_numa = true;
    5939                 :          3 :                 ordered_wq_attrs[i] = attrs;
    5940                 :            :         }
    5941                 :            : 
    5942                 :          3 :         system_wq = alloc_workqueue("events", 0, 0);
    5943                 :          3 :         system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
    5944                 :          3 :         system_long_wq = alloc_workqueue("events_long", 0, 0);
    5945                 :          3 :         system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
    5946                 :          3 :                                             WQ_UNBOUND_MAX_ACTIVE);
    5947                 :          3 :         system_freezable_wq = alloc_workqueue("events_freezable",
    5948                 :            :                                               WQ_FREEZABLE, 0);
    5949                 :          3 :         system_power_efficient_wq = alloc_workqueue("events_power_efficient",
    5950                 :            :                                               WQ_POWER_EFFICIENT, 0);
    5951                 :          3 :         system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
    5952                 :            :                                               WQ_FREEZABLE | WQ_POWER_EFFICIENT,
    5953                 :            :                                               0);
    5954                 :          3 :         BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
    5955                 :            :                !system_unbound_wq || !system_freezable_wq ||
    5956                 :            :                !system_power_efficient_wq ||
    5957                 :            :                !system_freezable_power_efficient_wq);
    5958                 :            : 
    5959                 :          3 :         return 0;
    5960                 :            : }
    5961                 :            : 
    5962                 :            : /**
    5963                 :            :  * workqueue_init - bring workqueue subsystem fully online
    5964                 :            :  *
    5965                 :            :  * This is the latter half of two-staged workqueue subsystem initialization
    5966                 :            :  * and invoked as soon as kthreads can be created and scheduled.
    5967                 :            :  * Workqueues have been created and work items queued on them, but there
    5968                 :            :  * are no kworkers executing the work items yet.  Populate the worker pools
    5969                 :            :  * with the initial workers and enable future kworker creations.
    5970                 :            :  */
    5971                 :          3 : int __init workqueue_init(void)
    5972                 :            : {
    5973                 :            :         struct workqueue_struct *wq;
    5974                 :            :         struct worker_pool *pool;
    5975                 :            :         int cpu, bkt;
    5976                 :            : 
    5977                 :            :         /*
    5978                 :            :          * It'd be simpler to initialize NUMA in workqueue_init_early() but
    5979                 :            :          * CPU to node mapping may not be available that early on some
    5980                 :            :          * archs such as power and arm64.  As per-cpu pools created
    5981                 :            :          * previously could be missing node hint and unbound pools NUMA
    5982                 :            :          * affinity, fix them up.
    5983                 :            :          *
    5984                 :            :          * Also, while iterating workqueues, create rescuers if requested.
    5985                 :            :          */
    5986                 :            :         wq_numa_init();
    5987                 :            : 
    5988                 :          3 :         mutex_lock(&wq_pool_mutex);
    5989                 :            : 
    5990                 :          3 :         for_each_possible_cpu(cpu) {
    5991                 :          3 :                 for_each_cpu_worker_pool(pool, cpu) {
    5992                 :          3 :                         pool->node = cpu_to_node(cpu);
    5993                 :            :                 }
    5994                 :            :         }
    5995                 :            : 
    5996                 :          3 :         list_for_each_entry(wq, &workqueues, list) {
    5997                 :          3 :                 wq_update_unbound_numa(wq, smp_processor_id(), true);
    5998                 :          3 :                 WARN(init_rescuer(wq),
    5999                 :            :                      "workqueue: failed to create early rescuer for %s",
    6000                 :            :                      wq->name);
    6001                 :            :         }
    6002                 :            : 
    6003                 :          3 :         mutex_unlock(&wq_pool_mutex);
    6004                 :            : 
    6005                 :            :         /* create the initial workers */
    6006                 :          3 :         for_each_online_cpu(cpu) {
    6007                 :          3 :                 for_each_cpu_worker_pool(pool, cpu) {
    6008                 :          3 :                         pool->flags &= ~POOL_DISASSOCIATED;
    6009                 :          3 :                         BUG_ON(!create_worker(pool));
    6010                 :            :                 }
    6011                 :            :         }
    6012                 :            : 
    6013                 :          3 :         hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
    6014                 :          3 :                 BUG_ON(!create_worker(pool));
    6015                 :            : 
    6016                 :          3 :         wq_online = true;
    6017                 :            :         wq_watchdog_init();
    6018                 :            : 
    6019                 :          3 :         return 0;
    6020                 :            : }

Generated by: LCOV version 1.14