LCOV - Real - kernel/cgroup/cgroup.c

LCOV - code coverage report

Current view:	top level - kernel/cgroup - cgroup.c (source / functions)		Hit	Total	Coverage
Test:	Real	Lines:	1334	1909	69.9 %
Date:	2020-10-17 15:46:43	Functions:	1	191	0.5 %
Legend:	Neither, QEMU, Real, Both	Branches:	0	0	-

           Branch data     Line data    Source code

       1                 :            : /*
       2                 :            :  *  Generic process-grouping system.
       3                 :            :  *
       4                 :            :  *  Based originally on the cpuset system, extracted by Paul Menage
       5                 :            :  *  Copyright (C) 2006 Google, Inc
       6                 :            :  *
       7                 :            :  *  Notifications support
       8                 :            :  *  Copyright (C) 2009 Nokia Corporation
       9                 :            :  *  Author: Kirill A. Shutemov
      10                 :            :  *
      11                 :            :  *  Copyright notices from the original cpuset code:
      12                 :            :  *  --------------------------------------------------
      13                 :            :  *  Copyright (C) 2003 BULL SA.
      14                 :            :  *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
      15                 :            :  *
      16                 :            :  *  Portions derived from Patrick Mochel's sysfs code.
      17                 :            :  *  sysfs is Copyright (c) 2001-3 Patrick Mochel
      18                 :            :  *
      19                 :            :  *  2003-10-10 Written by Simon Derr.
      20                 :            :  *  2003-10-22 Updates by Stephen Hemminger.
      21                 :            :  *  2004 May-July Rework by Paul Jackson.
      22                 :            :  *  ---------------------------------------------------
      23                 :            :  *
      24                 :            :  *  This file is subject to the terms and conditions of the GNU General Public
      25                 :            :  *  License.  See the file COPYING in the main directory of the Linux
      26                 :            :  *  distribution for more details.
      27                 :            :  */
      28                 :            : 
      29                 :            : #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      30                 :            : 
      31                 :            : #include "cgroup-internal.h"
      32                 :            : 
      33                 :            : #include <linux/cred.h>
      34                 :            : #include <linux/errno.h>
      35                 :            : #include <linux/init_task.h>
      36                 :            : #include <linux/kernel.h>
      37                 :            : #include <linux/magic.h>
      38                 :            : #include <linux/mutex.h>
      39                 :            : #include <linux/mount.h>
      40                 :            : #include <linux/pagemap.h>
      41                 :            : #include <linux/proc_fs.h>
      42                 :            : #include <linux/rcupdate.h>
      43                 :            : #include <linux/sched.h>
      44                 :            : #include <linux/sched/task.h>
      45                 :            : #include <linux/slab.h>
      46                 :            : #include <linux/spinlock.h>
      47                 :            : #include <linux/percpu-rwsem.h>
      48                 :            : #include <linux/string.h>
      49                 :            : #include <linux/hashtable.h>
      50                 :            : #include <linux/idr.h>
      51                 :            : #include <linux/kthread.h>
      52                 :            : #include <linux/atomic.h>
      53                 :            : #include <linux/cpuset.h>
      54                 :            : #include <linux/proc_ns.h>
      55                 :            : #include <linux/nsproxy.h>
      56                 :            : #include <linux/file.h>
      57                 :            : #include <linux/fs_parser.h>
      58                 :            : #include <linux/sched/cputime.h>
      59                 :            : #include <linux/psi.h>
      60                 :            : #include <net/sock.h>
      61                 :            : 
      62                 :            : #define CREATE_TRACE_POINTS
      63                 :            : #include <trace/events/cgroup.h>
      64                 :            : 
      65                 :            : #define CGROUP_FILE_NAME_MAX            (MAX_CGROUP_TYPE_NAMELEN +      \
      66                 :            :                                          MAX_CFTYPE_NAME + 2)
      67                 :            : /* let's not notify more than 100 times per second */
      68                 :            : #define CGROUP_FILE_NOTIFY_MIN_INTV     DIV_ROUND_UP(HZ, 100)
      69                 :            : 
      70                 :            : /*
      71                 :            :  * cgroup_mutex is the master lock.  Any modification to cgroup or its
      72                 :            :  * hierarchy must be performed while holding it.
      73                 :            :  *
      74                 :            :  * css_set_lock protects task->cgroups pointer, the list of css_set
      75                 :            :  * objects, and the chain of tasks off each css_set.
      76                 :            :  *
      77                 :            :  * These locks are exported if CONFIG_PROVE_RCU so that accessors in
      78                 :            :  * cgroup.h can use them for lockdep annotations.
      79                 :            :  */
      80                 :            : DEFINE_MUTEX(cgroup_mutex);
      81                 :            : DEFINE_SPINLOCK(css_set_lock);
      82                 :            : 
      83                 :            : #ifdef CONFIG_PROVE_RCU
      84                 :            : EXPORT_SYMBOL_GPL(cgroup_mutex);
      85                 :            : EXPORT_SYMBOL_GPL(css_set_lock);
      86                 :            : #endif
      87                 :            : 
      88                 :            : DEFINE_SPINLOCK(trace_cgroup_path_lock);
      89                 :            : char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
      90                 :            : bool cgroup_debug __read_mostly;
      91                 :            : 
      92                 :            : /*
      93                 :            :  * Protects cgroup_idr and css_idr so that IDs can be released without
      94                 :            :  * grabbing cgroup_mutex.
      95                 :            :  */
      96                 :            : static DEFINE_SPINLOCK(cgroup_idr_lock);
      97                 :            : 
      98                 :            : /*
      99                 :            :  * Protects cgroup_file->kn for !self csses.  It synchronizes notifications
     100                 :            :  * against file removal/re-creation across css hiding.
     101                 :            :  */
     102                 :            : static DEFINE_SPINLOCK(cgroup_file_kn_lock);
     103                 :            : 
     104                 :            : DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
     105                 :            : 
     106                 :            : #define cgroup_assert_mutex_or_rcu_locked()                             \
     107                 :            :         RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
     108                 :            :                            !lockdep_is_held(&cgroup_mutex),         \
     109                 :            :                            "cgroup_mutex or RCU read lock required");
     110                 :            : 
     111                 :            : /*
     112                 :            :  * cgroup destruction makes heavy use of work items and there can be a lot
     113                 :            :  * of concurrent destructions.  Use a separate workqueue so that cgroup
     114                 :            :  * destruction work items don't end up filling up max_active of system_wq
     115                 :            :  * which may lead to deadlock.
     116                 :            :  */
     117                 :            : static struct workqueue_struct *cgroup_destroy_wq;
     118                 :            : 
     119                 :            : /* generate an array of cgroup subsystem pointers */
     120                 :            : #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
     121                 :            : struct cgroup_subsys *cgroup_subsys[] = {
     122                 :            : #include <linux/cgroup_subsys.h>
     123                 :            : };
     124                 :            : #undef SUBSYS
     125                 :            : 
     126                 :            : /* array of cgroup subsystem names */
     127                 :            : #define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
     128                 :            : static const char *cgroup_subsys_name[] = {
     129                 :            : #include <linux/cgroup_subsys.h>
     130                 :            : };
     131                 :            : #undef SUBSYS
     132                 :            : 
     133                 :            : /* array of static_keys for cgroup_subsys_enabled() and cgroup_subsys_on_dfl() */
     134                 :            : #define SUBSYS(_x)                                                              \
     135                 :            :         DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key);                 \
     136                 :            :         DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key);                  \
     137                 :            :         EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key);                      \
     138                 :            :         EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
     139                 :            : #include <linux/cgroup_subsys.h>
     140                 :            : #undef SUBSYS
     141                 :            : 
     142                 :            : #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
     143                 :            : static struct static_key_true *cgroup_subsys_enabled_key[] = {
     144                 :            : #include <linux/cgroup_subsys.h>
     145                 :            : };
     146                 :            : #undef SUBSYS
     147                 :            : 
     148                 :            : #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
     149                 :            : static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
     150                 :            : #include <linux/cgroup_subsys.h>
     151                 :            : };
     152                 :            : #undef SUBSYS
     153                 :            : 
     154                 :            : static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
     155                 :            : 
     156                 :            : /*
     157                 :            :  * The default hierarchy, reserved for the subsystems that are otherwise
     158                 :            :  * unattached - it never has more than a single cgroup, and all tasks are
     159                 :            :  * part of that cgroup.
     160                 :            :  */
     161                 :            : struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
     162                 :            : EXPORT_SYMBOL_GPL(cgrp_dfl_root);
     163                 :            : 
     164                 :            : /*
     165                 :            :  * The default hierarchy always exists but is hidden until mounted for the
     166                 :            :  * first time.  This is for backward compatibility.
     167                 :            :  */
     168                 :            : static bool cgrp_dfl_visible;
     169                 :            : 
     170                 :            : /* some controllers are not supported in the default hierarchy */
     171                 :            : static u16 cgrp_dfl_inhibit_ss_mask;
     172                 :            : 
     173                 :            : /* some controllers are implicitly enabled on the default hierarchy */
     174                 :            : static u16 cgrp_dfl_implicit_ss_mask;
     175                 :            : 
     176                 :            : /* some controllers can be threaded on the default hierarchy */
     177                 :            : static u16 cgrp_dfl_threaded_ss_mask;
     178                 :            : 
     179                 :            : /* The list of hierarchy roots */
     180                 :            : LIST_HEAD(cgroup_roots);
     181                 :            : static int cgroup_root_count;
     182                 :            : 
     183                 :            : /* hierarchy ID allocation and mapping, protected by cgroup_mutex */
     184                 :            : static DEFINE_IDR(cgroup_hierarchy_idr);
     185                 :            : 
     186                 :            : /*
     187                 :            :  * Assign a monotonically increasing serial number to csses.  It guarantees
     188                 :            :  * cgroups with bigger numbers are newer than those with smaller numbers.
     189                 :            :  * Also, as csses are always appended to the parent's ->children list, it
     190                 :            :  * guarantees that sibling csses are always sorted in the ascending serial
     191                 :            :  * number order on the list.  Protected by cgroup_mutex.
     192                 :            :  */
     193                 :            : static u64 css_serial_nr_next = 1;
     194                 :            : 
     195                 :            : /*
     196                 :            :  * These bitmasks identify subsystems with specific features to avoid
     197                 :            :  * having to do iterative checks repeatedly.
     198                 :            :  */
     199                 :            : static u16 have_fork_callback __read_mostly;
     200                 :            : static u16 have_exit_callback __read_mostly;
     201                 :            : static u16 have_release_callback __read_mostly;
     202                 :            : static u16 have_canfork_callback __read_mostly;
     203                 :            : 
     204                 :            : /* cgroup namespace for init task */
     205                 :            : struct cgroup_namespace init_cgroup_ns = {
     206                 :            :         .count          = REFCOUNT_INIT(2),
     207                 :            :         .user_ns        = &init_user_ns,
     208                 :            :         .ns.ops         = &cgroupns_operations,
     209                 :            :         .ns.inum        = PROC_CGROUP_INIT_INO,
     210                 :            :         .root_cset      = &init_css_set,
     211                 :            : };
     212                 :            : 
     213                 :            : static struct file_system_type cgroup2_fs_type;
     214                 :            : static struct cftype cgroup_base_files[];
     215                 :            : 
     216                 :            : static int cgroup_apply_control(struct cgroup *cgrp);
     217                 :            : static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
     218                 :            : static void css_task_iter_skip(struct css_task_iter *it,
     219                 :            :                                struct task_struct *task);
     220                 :            : static int cgroup_destroy_locked(struct cgroup *cgrp);
     221                 :            : static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
     222                 :            :                                               struct cgroup_subsys *ss);
     223                 :            : static void css_release(struct percpu_ref *ref);
     224                 :            : static void kill_css(struct cgroup_subsys_state *css);
     225                 :            : static int cgroup_addrm_files(struct cgroup_subsys_state *css,
     226                 :            :                               struct cgroup *cgrp, struct cftype cfts[],
     227                 :            :                               bool is_add);
     228                 :            : 
     229                 :            : /**
     230                 :            :  * cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID
     231                 :            :  * @ssid: subsys ID of interest
     232                 :            :  *
     233                 :            :  * cgroup_subsys_enabled() can only be used with literal subsys names which
     234                 :            :  * is fine for individual subsystems but unsuitable for cgroup core.  This
     235                 :            :  * is slower static_key_enabled() based test indexed by @ssid.
     236                 :            :  */
     237                 :          3 : bool cgroup_ssid_enabled(int ssid)
     238                 :            : {
     239                 :            :         if (CGROUP_SUBSYS_COUNT == 0)
     240                 :            :                 return false;
     241                 :            : 
     242                 :          3 :         return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
     243                 :            : }
     244                 :            : 
     245                 :            : /**
     246                 :            :  * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
     247                 :            :  * @cgrp: the cgroup of interest
     248                 :            :  *
     249                 :            :  * The default hierarchy is the v2 interface of cgroup and this function
     250                 :            :  * can be used to test whether a cgroup is on the default hierarchy for
     251                 :            :  * cases where a subsystem should behave differnetly depending on the
     252                 :            :  * interface version.
     253                 :            :  *
     254                 :            :  * The set of behaviors which change on the default hierarchy are still
     255                 :            :  * being determined and the mount option is prefixed with __DEVEL__.
     256                 :            :  *
     257                 :            :  * List of changed behaviors:
     258                 :            :  *
     259                 :            :  * - Mount options "noprefix", "xattr", "clone_children", "release_agent"
     260                 :            :  *   and "name" are disallowed.
     261                 :            :  *
     262                 :            :  * - When mounting an existing superblock, mount options should match.
     263                 :            :  *
     264                 :            :  * - Remount is disallowed.
     265                 :            :  *
     266                 :            :  * - rename(2) is disallowed.
     267                 :            :  *
     268                 :            :  * - "tasks" is removed.  Everything should be at process granularity.  Use
     269                 :            :  *   "cgroup.procs" instead.
     270                 :            :  *
     271                 :            :  * - "cgroup.procs" is not sorted.  pids will be unique unless they got
     272                 :            :  *   recycled inbetween reads.
     273                 :            :  *
     274                 :            :  * - "release_agent" and "notify_on_release" are removed.  Replacement
     275                 :            :  *   notification mechanism will be implemented.
     276                 :            :  *
     277                 :            :  * - "cgroup.clone_children" is removed.
     278                 :            :  *
     279                 :            :  * - "cgroup.subtree_populated" is available.  Its value is 0 if the cgroup
     280                 :            :  *   and its descendants contain no task; otherwise, 1.  The file also
     281                 :            :  *   generates kernfs notification which can be monitored through poll and
     282                 :            :  *   [di]notify when the value of the file changes.
     283                 :            :  *
     284                 :            :  * - cpuset: tasks will be kept in empty cpusets when hotplug happens and
     285                 :            :  *   take masks of ancestors with non-empty cpus/mems, instead of being
     286                 :            :  *   moved to an ancestor.
     287                 :            :  *
     288                 :            :  * - cpuset: a task can be moved into an empty cpuset, and again it takes
     289                 :            :  *   masks of ancestors.
     290                 :            :  *
     291                 :            :  * - memcg: use_hierarchy is on by default and the cgroup file for the flag
     292                 :            :  *   is not created.
     293                 :            :  *
     294                 :            :  * - blkcg: blk-throttle becomes properly hierarchical.
     295                 :            :  *
     296                 :            :  * - debug: disallowed on the default hierarchy.
     297                 :            :  */
     298                 :          0 : bool cgroup_on_dfl(const struct cgroup *cgrp)
     299                 :            : {
     300                 :          3 :         return cgrp->root == &cgrp_dfl_root;
     301                 :            : }
     302                 :            : 
     303                 :            : /* IDR wrappers which synchronize using cgroup_idr_lock */
     304                 :          3 : static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
     305                 :            :                             gfp_t gfp_mask)
     306                 :            : {
     307                 :            :         int ret;
     308                 :            : 
     309                 :          3 :         idr_preload(gfp_mask);
     310                 :            :         spin_lock_bh(&cgroup_idr_lock);
     311                 :          3 :         ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
     312                 :            :         spin_unlock_bh(&cgroup_idr_lock);
     313                 :            :         idr_preload_end();
     314                 :          3 :         return ret;
     315                 :            : }
     316                 :            : 
     317                 :          3 : static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
     318                 :            : {
     319                 :            :         void *ret;
     320                 :            : 
     321                 :            :         spin_lock_bh(&cgroup_idr_lock);
     322                 :          3 :         ret = idr_replace(idr, ptr, id);
     323                 :            :         spin_unlock_bh(&cgroup_idr_lock);
     324                 :          3 :         return ret;
     325                 :            : }
     326                 :            : 
     327                 :          3 : static void cgroup_idr_remove(struct idr *idr, int id)
     328                 :            : {
     329                 :            :         spin_lock_bh(&cgroup_idr_lock);
     330                 :          3 :         idr_remove(idr, id);
     331                 :            :         spin_unlock_bh(&cgroup_idr_lock);
     332                 :          3 : }
     333                 :            : 
     334                 :            : static bool cgroup_has_tasks(struct cgroup *cgrp)
     335                 :            : {
     336                 :          3 :         return cgrp->nr_populated_csets;
     337                 :            : }
     338                 :            : 
     339                 :          0 : bool cgroup_is_threaded(struct cgroup *cgrp)
     340                 :            : {
     341                 :          3 :         return cgrp->dom_cgrp != cgrp;
     342                 :            : }
     343                 :            : 
     344                 :            : /* can @cgrp host both domain and threaded children? */
     345                 :            : static bool cgroup_is_mixable(struct cgroup *cgrp)
     346                 :            : {
     347                 :            :         /*
     348                 :            :          * Root isn't under domain level resource control exempting it from
     349                 :            :          * the no-internal-process constraint, so it can serve as a thread
     350                 :            :          * root and a parent of resource domains at the same time.
     351                 :            :          */
     352                 :            :         return !cgroup_parent(cgrp);
     353                 :            : }
     354                 :            : 
     355                 :            : /* can @cgrp become a thread root? should always be true for a thread root */
     356                 :          3 : static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
     357                 :            : {
     358                 :            :         /* mixables don't care */
     359                 :          3 :         if (cgroup_is_mixable(cgrp))
     360                 :            :                 return true;
     361                 :            : 
     362                 :            :         /* domain roots can't be nested under threaded */
     363                 :          3 :         if (cgroup_is_threaded(cgrp))
     364                 :            :                 return false;
     365                 :            : 
     366                 :            :         /* can only have either domain or threaded children */
     367                 :          3 :         if (cgrp->nr_populated_domain_children)
     368                 :            :                 return false;
     369                 :            : 
     370                 :            :         /* and no domain controllers can be enabled */
     371                 :          3 :         if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
     372                 :            :                 return false;
     373                 :            : 
     374                 :          3 :         return true;
     375                 :            : }
     376                 :            : 
     377                 :            : /* is @cgrp root of a threaded subtree? */
     378                 :          0 : bool cgroup_is_thread_root(struct cgroup *cgrp)
     379                 :            : {
     380                 :            :         /* thread root should be a domain */
     381                 :          3 :         if (cgroup_is_threaded(cgrp))
     382                 :            :                 return false;
     383                 :            : 
     384                 :            :         /* a domain w/ threaded children is a thread root */
     385                 :          3 :         if (cgrp->nr_threaded_children)
     386                 :            :                 return true;
     387                 :            : 
     388                 :            :         /*
     389                 :            :          * A domain which has tasks and explicit threaded controllers
     390                 :            :          * enabled is a thread root.
     391                 :            :          */
     392                 :          3 :         if (cgroup_has_tasks(cgrp) &&
     393                 :          3 :             (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
     394                 :            :                 return true;
     395                 :            : 
     396                 :          0 :         return false;
     397                 :            : }
     398                 :            : 
     399                 :            : /* a domain which isn't connected to the root w/o brekage can't be used */
     400                 :          3 : static bool cgroup_is_valid_domain(struct cgroup *cgrp)
     401                 :            : {
     402                 :            :         /* the cgroup itself can be a thread root */
     403                 :          3 :         if (cgroup_is_threaded(cgrp))
     404                 :            :                 return false;
     405                 :            : 
     406                 :            :         /* but the ancestors can't be unless mixable */
     407                 :          3 :         while ((cgrp = cgroup_parent(cgrp))) {
     408                 :          3 :                 if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
     409                 :            :                         return false;
     410                 :          3 :                 if (cgroup_is_threaded(cgrp))
     411                 :            :                         return false;
     412                 :            :         }
     413                 :            : 
     414                 :            :         return true;
     415                 :            : }
     416                 :            : 
     417                 :            : /* subsystems visibly enabled on a cgroup */
     418                 :          3 : static u16 cgroup_control(struct cgroup *cgrp)
     419                 :            : {
     420                 :            :         struct cgroup *parent = cgroup_parent(cgrp);
     421                 :          3 :         u16 root_ss_mask = cgrp->root->subsys_mask;
     422                 :            : 
     423                 :          3 :         if (parent) {
     424                 :          3 :                 u16 ss_mask = parent->subtree_control;
     425                 :            : 
     426                 :            :                 /* threaded cgroups can only have threaded controllers */
     427                 :          3 :                 if (cgroup_is_threaded(cgrp))
     428                 :          0 :                         ss_mask &= cgrp_dfl_threaded_ss_mask;
     429                 :          3 :                 return ss_mask;
     430                 :            :         }
     431                 :            : 
     432                 :          3 :         if (cgroup_on_dfl(cgrp))
     433                 :          3 :                 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
     434                 :            :                                   cgrp_dfl_implicit_ss_mask);
     435                 :          3 :         return root_ss_mask;
     436                 :            : }
     437                 :            : 
     438                 :            : /* subsystems enabled on a cgroup */
     439                 :            : static u16 cgroup_ss_mask(struct cgroup *cgrp)
     440                 :            : {
     441                 :            :         struct cgroup *parent = cgroup_parent(cgrp);
     442                 :            : 
     443                 :          3 :         if (parent) {
     444                 :          3 :                 u16 ss_mask = parent->subtree_ss_mask;
     445                 :            : 
     446                 :            :                 /* threaded cgroups can only have threaded controllers */
     447                 :          3 :                 if (cgroup_is_threaded(cgrp))
     448                 :          0 :                         ss_mask &= cgrp_dfl_threaded_ss_mask;
     449                 :            :                 return ss_mask;
     450                 :            :         }
     451                 :            : 
     452                 :          3 :         return cgrp->root->subsys_mask;
     453                 :            : }
     454                 :            : 
     455                 :            : /**
     456                 :            :  * cgroup_css - obtain a cgroup's css for the specified subsystem
     457                 :            :  * @cgrp: the cgroup of interest
     458                 :            :  * @ss: the subsystem of interest (%NULL returns @cgrp->self)
     459                 :            :  *
     460                 :            :  * Return @cgrp's css (cgroup_subsys_state) associated with @ss.  This
     461                 :            :  * function must be called either under cgroup_mutex or rcu_read_lock() and
     462                 :            :  * the caller is responsible for pinning the returned css if it wants to
     463                 :            :  * keep accessing it outside the said locks.  This function may return
     464                 :            :  * %NULL if @cgrp doesn't have @subsys_id enabled.
     465                 :            :  */
     466                 :            : static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
     467                 :            :                                               struct cgroup_subsys *ss)
     468                 :            : {
     469                 :          3 :         if (ss)
     470                 :          3 :                 return rcu_dereference_check(cgrp->subsys[ss->id],
     471                 :            :                                         lockdep_is_held(&cgroup_mutex));
     472                 :            :         else
     473                 :          3 :                 return &cgrp->self;
     474                 :            : }
     475                 :            : 
     476                 :            : /**
     477                 :            :  * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem
     478                 :            :  * @cgrp: the cgroup of interest
     479                 :            :  * @ss: the subsystem of interest
     480                 :            :  *
     481                 :            :  * Find and get @cgrp's css assocaited with @ss.  If the css doesn't exist
     482                 :            :  * or is offline, %NULL is returned.
     483                 :            :  */
     484                 :          0 : static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
     485                 :            :                                                      struct cgroup_subsys *ss)
     486                 :            : {
     487                 :            :         struct cgroup_subsys_state *css;
     488                 :            : 
     489                 :            :         rcu_read_lock();
     490                 :            :         css = cgroup_css(cgrp, ss);
     491                 :          0 :         if (css && !css_tryget_online(css))
     492                 :            :                 css = NULL;
     493                 :            :         rcu_read_unlock();
     494                 :            : 
     495                 :          0 :         return css;
     496                 :            : }
     497                 :            : 
     498                 :            : /**
     499                 :            :  * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
     500                 :            :  * @cgrp: the cgroup of interest
     501                 :            :  * @ss: the subsystem of interest (%NULL returns @cgrp->self)
     502                 :            :  *
     503                 :            :  * Similar to cgroup_css() but returns the effective css, which is defined
     504                 :            :  * as the matching css of the nearest ancestor including self which has @ss
     505                 :            :  * enabled.  If @ss is associated with the hierarchy @cgrp is on, this
     506                 :            :  * function is guaranteed to return non-NULL css.
     507                 :            :  */
     508                 :          3 : static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
     509                 :            :                                                         struct cgroup_subsys *ss)
     510                 :            : {
     511                 :            :         lockdep_assert_held(&cgroup_mutex);
     512                 :            : 
     513                 :          3 :         if (!ss)
     514                 :          0 :                 return &cgrp->self;
     515                 :            : 
     516                 :            :         /*
     517                 :            :          * This function is used while updating css associations and thus
     518                 :            :          * can't test the csses directly.  Test ss_mask.
     519                 :            :          */
     520                 :          3 :         while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
     521                 :            :                 cgrp = cgroup_parent(cgrp);
     522                 :          0 :                 if (!cgrp)
     523                 :            :                         return NULL;
     524                 :            :         }
     525                 :            : 
     526                 :          3 :         return cgroup_css(cgrp, ss);
     527                 :            : }
     528                 :            : 
     529                 :            : /**
     530                 :            :  * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
     531                 :            :  * @cgrp: the cgroup of interest
     532                 :            :  * @ss: the subsystem of interest
     533                 :            :  *
     534                 :            :  * Find and get the effective css of @cgrp for @ss.  The effective css is
     535                 :            :  * defined as the matching css of the nearest ancestor including self which
     536                 :            :  * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
     537                 :            :  * the root css is returned, so this function always returns a valid css.
     538                 :            :  *
     539                 :            :  * The returned css is not guaranteed to be online, and therefore it is the
     540                 :            :  * callers responsiblity to tryget a reference for it.
     541                 :            :  */
     542                 :          0 : struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
     543                 :            :                                          struct cgroup_subsys *ss)
     544                 :            : {
     545                 :            :         struct cgroup_subsys_state *css;
     546                 :            : 
     547                 :            :         do {
     548                 :            :                 css = cgroup_css(cgrp, ss);
     549                 :            : 
     550                 :          0 :                 if (css)
     551                 :          0 :                         return css;
     552                 :            :                 cgrp = cgroup_parent(cgrp);
     553                 :          0 :         } while (cgrp);
     554                 :            : 
     555                 :          0 :         return init_css_set.subsys[ss->id];
     556                 :            : }
     557                 :            : 
     558                 :            : /**
     559                 :            :  * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
     560                 :            :  * @cgrp: the cgroup of interest
     561                 :            :  * @ss: the subsystem of interest
     562                 :            :  *
     563                 :            :  * Find and get the effective css of @cgrp for @ss.  The effective css is
     564                 :            :  * defined as the matching css of the nearest ancestor including self which
     565                 :            :  * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
     566                 :            :  * the root css is returned, so this function always returns a valid css.
     567                 :            :  * The returned css must be put using css_put().
     568                 :            :  */
     569                 :          0 : struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
     570                 :            :                                              struct cgroup_subsys *ss)
     571                 :            : {
     572                 :            :         struct cgroup_subsys_state *css;
     573                 :            : 
     574                 :            :         rcu_read_lock();
     575                 :            : 
     576                 :            :         do {
     577                 :            :                 css = cgroup_css(cgrp, ss);
     578                 :            : 
     579                 :          0 :                 if (css && css_tryget_online(css))
     580                 :            :                         goto out_unlock;
     581                 :            :                 cgrp = cgroup_parent(cgrp);
     582                 :          0 :         } while (cgrp);
     583                 :            : 
     584                 :          0 :         css = init_css_set.subsys[ss->id];
     585                 :            :         css_get(css);
     586                 :            : out_unlock:
     587                 :            :         rcu_read_unlock();
     588                 :          0 :         return css;
     589                 :            : }
     590                 :            : 
     591                 :          3 : static void cgroup_get_live(struct cgroup *cgrp)
     592                 :            : {
     593                 :          3 :         WARN_ON_ONCE(cgroup_is_dead(cgrp));
     594                 :            :         css_get(&cgrp->self);
     595                 :          3 : }
     596                 :            : 
     597                 :            : /**
     598                 :            :  * __cgroup_task_count - count the number of tasks in a cgroup. The caller
     599                 :            :  * is responsible for taking the css_set_lock.
     600                 :            :  * @cgrp: the cgroup in question
     601                 :            :  */
     602                 :          0 : int __cgroup_task_count(const struct cgroup *cgrp)
     603                 :            : {
     604                 :            :         int count = 0;
     605                 :            :         struct cgrp_cset_link *link;
     606                 :            : 
     607                 :            :         lockdep_assert_held(&css_set_lock);
     608                 :            : 
     609                 :          0 :         list_for_each_entry(link, &cgrp->cset_links, cset_link)
     610                 :          0 :                 count += link->cset->nr_tasks;
     611                 :            : 
     612                 :          0 :         return count;
     613                 :            : }
     614                 :            : 
     615                 :            : /**
     616                 :            :  * cgroup_task_count - count the number of tasks in a cgroup.
     617                 :            :  * @cgrp: the cgroup in question
     618                 :            :  */
     619                 :          0 : int cgroup_task_count(const struct cgroup *cgrp)
     620                 :            : {
     621                 :            :         int count;
     622                 :            : 
     623                 :            :         spin_lock_irq(&css_set_lock);
     624                 :            :         count = __cgroup_task_count(cgrp);
     625                 :            :         spin_unlock_irq(&css_set_lock);
     626                 :            : 
     627                 :          0 :         return count;
     628                 :            : }
     629                 :            : 
     630                 :          3 : struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
     631                 :            : {
     632                 :          3 :         struct cgroup *cgrp = of->kn->parent->priv;
     633                 :            :         struct cftype *cft = of_cft(of);
     634                 :            : 
     635                 :            :         /*
     636                 :            :          * This is open and unprotected implementation of cgroup_css().
     637                 :            :          * seq_css() is only called from a kernfs file operation which has
     638                 :            :          * an active reference on the file.  Because all the subsystem
     639                 :            :          * files are drained before a css is disassociated with a cgroup,
     640                 :            :          * the matching css from the cgroup's subsys table is guaranteed to
     641                 :            :          * be and stay valid until the enclosing operation is complete.
     642                 :            :          */
     643                 :          3 :         if (cft->ss)
     644                 :          3 :                 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
     645                 :            :         else
     646                 :          3 :                 return &cgrp->self;
     647                 :            : }
     648                 :            : EXPORT_SYMBOL_GPL(of_css);
     649                 :            : 
     650                 :            : /**
     651                 :            :  * for_each_css - iterate all css's of a cgroup
     652                 :            :  * @css: the iteration cursor
     653                 :            :  * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
     654                 :            :  * @cgrp: the target cgroup to iterate css's of
     655                 :            :  *
     656                 :            :  * Should be called under cgroup_[tree_]mutex.
     657                 :            :  */
     658                 :            : #define for_each_css(css, ssid, cgrp)                                   \
     659                 :            :         for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)     \
     660                 :            :                 if (!((css) = rcu_dereference_check(                    \
     661                 :            :                                 (cgrp)->subsys[(ssid)],                      \
     662                 :            :                                 lockdep_is_held(&cgroup_mutex)))) { }       \
     663                 :            :                 else
     664                 :            : 
     665                 :            : /**
     666                 :            :  * for_each_e_css - iterate all effective css's of a cgroup
     667                 :            :  * @css: the iteration cursor
     668                 :            :  * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
     669                 :            :  * @cgrp: the target cgroup to iterate css's of
     670                 :            :  *
     671                 :            :  * Should be called under cgroup_[tree_]mutex.
     672                 :            :  */
     673                 :            : #define for_each_e_css(css, ssid, cgrp)                                     \
     674                 :            :         for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)         \
     675                 :            :                 if (!((css) = cgroup_e_css_by_mask(cgrp,                    \
     676                 :            :                                                    cgroup_subsys[(ssid)]))) \
     677                 :            :                         ;                                                   \
     678                 :            :                 else
     679                 :            : 
     680                 :            : /**
     681                 :            :  * do_each_subsys_mask - filter for_each_subsys with a bitmask
     682                 :            :  * @ss: the iteration cursor
     683                 :            :  * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
     684                 :            :  * @ss_mask: the bitmask
     685                 :            :  *
     686                 :            :  * The block will only run for cases where the ssid-th bit (1 << ssid) of
     687                 :            :  * @ss_mask is set.
     688                 :            :  */
     689                 :            : #define do_each_subsys_mask(ss, ssid, ss_mask) do {                     \
     690                 :            :         unsigned long __ss_mask = (ss_mask);                            \
     691                 :            :         if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \
     692                 :            :                 (ssid) = 0;                                             \
     693                 :            :                 break;                                                  \
     694                 :            :         }                                                               \
     695                 :            :         for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) {   \
     696                 :            :                 (ss) = cgroup_subsys[ssid];                             \
     697                 :            :                 {
     698                 :            : 
     699                 :            : #define while_each_subsys_mask()                                        \
     700                 :            :                 }                                                       \
     701                 :            :         }                                                               \
     702                 :            : } while (false)
     703                 :            : 
     704                 :            : /* iterate over child cgrps, lock should be held throughout iteration */
     705                 :            : #define cgroup_for_each_live_child(child, cgrp)                         \
     706                 :            :         list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
     707                 :            :                 if (({ lockdep_assert_held(&cgroup_mutex);          \
     708                 :            :                        cgroup_is_dead(child); }))                       \
     709                 :            :                         ;                                               \
     710                 :            :                 else
     711                 :            : 
     712                 :            : /* walk live descendants in preorder */
     713                 :            : #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)          \
     714                 :            :         css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL))  \
     715                 :            :                 if (({ lockdep_assert_held(&cgroup_mutex);          \
     716                 :            :                        (dsct) = (d_css)->cgroup;                     \
     717                 :            :                        cgroup_is_dead(dsct); }))                        \
     718                 :            :                         ;                                               \
     719                 :            :                 else
     720                 :            : 
     721                 :            : /* walk live descendants in postorder */
     722                 :            : #define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp)         \
     723                 :            :         css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
     724                 :            :                 if (({ lockdep_assert_held(&cgroup_mutex);          \
     725                 :            :                        (dsct) = (d_css)->cgroup;                     \
     726                 :            :                        cgroup_is_dead(dsct); }))                        \
     727                 :            :                         ;                                               \
     728                 :            :                 else
     729                 :            : 
     730                 :            : /*
     731                 :            :  * The default css_set - used by init and its children prior to any
     732                 :            :  * hierarchies being mounted. It contains a pointer to the root state
     733                 :            :  * for each subsystem. Also used to anchor the list of css_sets. Not
     734                 :            :  * reference-counted, to improve performance when child cgroups
     735                 :            :  * haven't been created.
     736                 :            :  */
     737                 :            : struct css_set init_css_set = {
     738                 :            :         .refcount               = REFCOUNT_INIT(1),
     739                 :            :         .dom_cset               = &init_css_set,
     740                 :            :         .tasks                  = LIST_HEAD_INIT(init_css_set.tasks),
     741                 :            :         .mg_tasks               = LIST_HEAD_INIT(init_css_set.mg_tasks),
     742                 :            :         .dying_tasks            = LIST_HEAD_INIT(init_css_set.dying_tasks),
     743                 :            :         .task_iters             = LIST_HEAD_INIT(init_css_set.task_iters),
     744                 :            :         .threaded_csets         = LIST_HEAD_INIT(init_css_set.threaded_csets),
     745                 :            :         .cgrp_links             = LIST_HEAD_INIT(init_css_set.cgrp_links),
     746                 :            :         .mg_preload_node        = LIST_HEAD_INIT(init_css_set.mg_preload_node),
     747                 :            :         .mg_node                = LIST_HEAD_INIT(init_css_set.mg_node),
     748                 :            : 
     749                 :            :         /*
     750                 :            :          * The following field is re-initialized when this cset gets linked
     751                 :            :          * in cgroup_init().  However, let's initialize the field
     752                 :            :          * statically too so that the default cgroup can be accessed safely
     753                 :            :          * early during boot.
     754                 :            :          */
     755                 :            :         .dfl_cgrp               = &cgrp_dfl_root.cgrp,
     756                 :            : };
     757                 :            : 
     758                 :            : static int css_set_count        = 1;    /* 1 for init_css_set */
     759                 :            : 
     760                 :            : static bool css_set_threaded(struct css_set *cset)
     761                 :            : {
     762                 :          3 :         return cset->dom_cset != cset;
     763                 :            : }
     764                 :            : 
     765                 :            : /**
     766                 :            :  * css_set_populated - does a css_set contain any tasks?
     767                 :            :  * @cset: target css_set
     768                 :            :  *
     769                 :            :  * css_set_populated() should be the same as !!cset->nr_tasks at steady
     770                 :            :  * state. However, css_set_populated() can be called while a task is being
     771                 :            :  * added to or removed from the linked list before the nr_tasks is
     772                 :            :  * properly updated. Hence, we can't just look at ->nr_tasks here.
     773                 :            :  */
     774                 :            : static bool css_set_populated(struct css_set *cset)
     775                 :            : {
     776                 :            :         lockdep_assert_held(&css_set_lock);
     777                 :            : 
     778                 :          3 :         return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
     779                 :            : }
     780                 :            : 
     781                 :            : /**
     782                 :            :  * cgroup_update_populated - update the populated count of a cgroup
     783                 :            :  * @cgrp: the target cgroup
     784                 :            :  * @populated: inc or dec populated count
     785                 :            :  *
     786                 :            :  * One of the css_sets associated with @cgrp is either getting its first
     787                 :            :  * task or losing the last.  Update @cgrp->nr_populated_* accordingly.  The
     788                 :            :  * count is propagated towards root so that a given cgroup's
     789                 :            :  * nr_populated_children is zero iff none of its descendants contain any
     790                 :            :  * tasks.
     791                 :            :  *
     792                 :            :  * @cgrp's interface file "cgroup.populated" is zero if both
     793                 :            :  * @cgrp->nr_populated_csets and @cgrp->nr_populated_children are zero and
     794                 :            :  * 1 otherwise.  When the sum changes from or to zero, userland is notified
     795                 :            :  * that the content of the interface file has changed.  This can be used to
     796                 :            :  * detect when @cgrp and its descendants become populated or empty.
     797                 :            :  */
     798                 :          3 : static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
     799                 :            : {
     800                 :            :         struct cgroup *child = NULL;
     801                 :          3 :         int adj = populated ? 1 : -1;
     802                 :            : 
     803                 :            :         lockdep_assert_held(&css_set_lock);
     804                 :            : 
     805                 :            :         do {
     806                 :            :                 bool was_populated = cgroup_is_populated(cgrp);
     807                 :            : 
     808                 :          3 :                 if (!child) {
     809                 :          3 :                         cgrp->nr_populated_csets += adj;
     810                 :            :                 } else {
     811                 :          3 :                         if (cgroup_is_threaded(child))
     812                 :          0 :                                 cgrp->nr_populated_threaded_children += adj;
     813                 :            :                         else
     814                 :          3 :                                 cgrp->nr_populated_domain_children += adj;
     815                 :            :                 }
     816                 :            : 
     817                 :          3 :                 if (was_populated == cgroup_is_populated(cgrp))
     818                 :            :                         break;
     819                 :            : 
     820                 :          3 :                 cgroup1_check_for_release(cgrp);
     821                 :          3 :                 TRACE_CGROUP_PATH(notify_populated, cgrp,
     822                 :            :                                   cgroup_is_populated(cgrp));
     823                 :          3 :                 cgroup_file_notify(&cgrp->events_file);
     824                 :            : 
     825                 :            :                 child = cgrp;
     826                 :            :                 cgrp = cgroup_parent(cgrp);
     827                 :          3 :         } while (cgrp);
     828                 :          3 : }
     829                 :            : 
     830                 :            : /**
     831                 :            :  * css_set_update_populated - update populated state of a css_set
     832                 :            :  * @cset: target css_set
     833                 :            :  * @populated: whether @cset is populated or depopulated
     834                 :            :  *
     835                 :            :  * @cset is either getting the first task or losing the last.  Update the
     836                 :            :  * populated counters of all associated cgroups accordingly.
     837                 :            :  */
     838                 :          3 : static void css_set_update_populated(struct css_set *cset, bool populated)
     839                 :            : {
     840                 :            :         struct cgrp_cset_link *link;
     841                 :            : 
     842                 :            :         lockdep_assert_held(&css_set_lock);
     843                 :            : 
     844                 :          3 :         list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
     845                 :          3 :                 cgroup_update_populated(link->cgrp, populated);
     846                 :          3 : }
     847                 :            : 
     848                 :            : /*
     849                 :            :  * @task is leaving, advance task iterators which are pointing to it so
     850                 :            :  * that they can resume at the next position.  Advancing an iterator might
     851                 :            :  * remove it from the list, use safe walk.  See css_task_iter_skip() for
     852                 :            :  * details.
     853                 :            :  */
     854                 :          3 : static void css_set_skip_task_iters(struct css_set *cset,
     855                 :            :                                     struct task_struct *task)
     856                 :            : {
     857                 :            :         struct css_task_iter *it, *pos;
     858                 :            : 
     859                 :          3 :         list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
     860                 :            :                 css_task_iter_skip(it, task);
     861                 :          3 : }
     862                 :            : 
     863                 :            : /**
     864                 :            :  * css_set_move_task - move a task from one css_set to another
     865                 :            :  * @task: task being moved
     866                 :            :  * @from_cset: css_set @task currently belongs to (may be NULL)
     867                 :            :  * @to_cset: new css_set @task is being moved to (may be NULL)
     868                 :            :  * @use_mg_tasks: move to @to_cset->mg_tasks instead of ->tasks
     869                 :            :  *
     870                 :            :  * Move @task from @from_cset to @to_cset.  If @task didn't belong to any
     871                 :            :  * css_set, @from_cset can be NULL.  If @task is being disassociated
     872                 :            :  * instead of moved, @to_cset can be NULL.
     873                 :            :  *
     874                 :            :  * This function automatically handles populated counter updates and
     875                 :            :  * css_task_iter adjustments but the caller is responsible for managing
     876                 :            :  * @from_cset and @to_cset's reference counts.
     877                 :            :  */
     878                 :          3 : static void css_set_move_task(struct task_struct *task,
     879                 :            :                               struct css_set *from_cset, struct css_set *to_cset,
     880                 :            :                               bool use_mg_tasks)
     881                 :            : {
     882                 :            :         lockdep_assert_held(&css_set_lock);
     883                 :            : 
     884                 :          3 :         if (to_cset && !css_set_populated(to_cset))
     885                 :          3 :                 css_set_update_populated(to_cset, true);
     886                 :            : 
     887                 :          3 :         if (from_cset) {
     888                 :          3 :                 WARN_ON_ONCE(list_empty(&task->cg_list));
     889                 :            : 
     890                 :          3 :                 css_set_skip_task_iters(from_cset, task);
     891                 :            :                 list_del_init(&task->cg_list);
     892                 :          3 :                 if (!css_set_populated(from_cset))
     893                 :          3 :                         css_set_update_populated(from_cset, false);
     894                 :            :         } else {
     895                 :          3 :                 WARN_ON_ONCE(!list_empty(&task->cg_list));
     896                 :            :         }
     897                 :            : 
     898                 :          3 :         if (to_cset) {
     899                 :            :                 /*
     900                 :            :                  * We are synchronized through cgroup_threadgroup_rwsem
     901                 :            :                  * against PF_EXITING setting such that we can't race
     902                 :            :                  * against cgroup_exit() changing the css_set to
     903                 :            :                  * init_css_set and dropping the old one.
     904                 :            :                  */
     905                 :          3 :                 WARN_ON_ONCE(task->flags & PF_EXITING);
     906                 :            : 
     907                 :            :                 cgroup_move_task(task, to_cset);
     908                 :          3 :                 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
     909                 :            :                                                              &to_cset->tasks);
     910                 :            :         }
     911                 :          3 : }
     912                 :            : 
     913                 :            : /*
     914                 :            :  * hash table for cgroup groups. This improves the performance to find
     915                 :            :  * an existing css_set. This hash doesn't (currently) take into
     916                 :            :  * account cgroups in empty hierarchies.
     917                 :            :  */
     918                 :            : #define CSS_SET_HASH_BITS       7
     919                 :            : static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
     920                 :            : 
     921                 :            : static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
     922                 :            : {
     923                 :            :         unsigned long key = 0UL;
     924                 :            :         struct cgroup_subsys *ss;
     925                 :            :         int i;
     926                 :            : 
     927                 :          3 :         for_each_subsys(ss, i)
     928                 :          3 :                 key += (unsigned long)css[i];
     929                 :          3 :         key = (key >> 16) ^ key;
     930                 :            : 
     931                 :            :         return key;
     932                 :            : }
     933                 :            : 
     934                 :          3 : void put_css_set_locked(struct css_set *cset)
     935                 :            : {
     936                 :            :         struct cgrp_cset_link *link, *tmp_link;
     937                 :            :         struct cgroup_subsys *ss;
     938                 :            :         int ssid;
     939                 :            : 
     940                 :            :         lockdep_assert_held(&css_set_lock);
     941                 :            : 
     942                 :          3 :         if (!refcount_dec_and_test(&cset->refcount))
     943                 :          3 :                 return;
     944                 :            : 
     945                 :          3 :         WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
     946                 :            : 
     947                 :            :         /* This css_set is dead. unlink it and release cgroup and css refs */
     948                 :          3 :         for_each_subsys(ss, ssid) {
     949                 :            :                 list_del(&cset->e_cset_node[ssid]);
     950                 :          3 :                 css_put(cset->subsys[ssid]);
     951                 :            :         }
     952                 :            :         hash_del(&cset->hlist);
     953                 :          3 :         css_set_count--;
     954                 :            : 
     955                 :          3 :         list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
     956                 :            :                 list_del(&link->cset_link);
     957                 :            :                 list_del(&link->cgrp_link);
     958                 :          3 :                 if (cgroup_parent(link->cgrp))
     959                 :            :                         cgroup_put(link->cgrp);
     960                 :          3 :                 kfree(link);
     961                 :            :         }
     962                 :            : 
     963                 :          3 :         if (css_set_threaded(cset)) {
     964                 :            :                 list_del(&cset->threaded_csets_node);
     965                 :          0 :                 put_css_set_locked(cset->dom_cset);
     966                 :            :         }
     967                 :            : 
     968                 :          3 :         kfree_rcu(cset, rcu_head);
     969                 :            : }
     970                 :            : 
     971                 :            : /**
     972                 :            :  * compare_css_sets - helper function for find_existing_css_set().
     973                 :            :  * @cset: candidate css_set being tested
     974                 :            :  * @old_cset: existing css_set for a task
     975                 :            :  * @new_cgrp: cgroup that's being entered by the task
     976                 :            :  * @template: desired set of css pointers in css_set (pre-calculated)
     977                 :            :  *
     978                 :            :  * Returns true if "cset" matches "old_cset" except for the hierarchy
     979                 :            :  * which "new_cgrp" belongs to, for which it should match "new_cgrp".
     980                 :            :  */
     981                 :          3 : static bool compare_css_sets(struct css_set *cset,
     982                 :            :                              struct css_set *old_cset,
     983                 :            :                              struct cgroup *new_cgrp,
     984                 :            :                              struct cgroup_subsys_state *template[])
     985                 :            : {
     986                 :            :         struct cgroup *new_dfl_cgrp;
     987                 :            :         struct list_head *l1, *l2;
     988                 :            : 
     989                 :            :         /*
     990                 :            :          * On the default hierarchy, there can be csets which are
     991                 :            :          * associated with the same set of cgroups but different csses.
     992                 :            :          * Let's first ensure that csses match.
     993                 :            :          */
     994                 :          3 :         if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
     995                 :            :                 return false;
     996                 :            : 
     997                 :            : 
     998                 :            :         /* @cset's domain should match the default cgroup's */
     999                 :          3 :         if (cgroup_on_dfl(new_cgrp))
    1000                 :            :                 new_dfl_cgrp = new_cgrp;
    1001                 :            :         else
    1002                 :          3 :                 new_dfl_cgrp = old_cset->dfl_cgrp;
    1003                 :            : 
    1004                 :          3 :         if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
    1005                 :            :                 return false;
    1006                 :            : 
    1007                 :            :         /*
    1008                 :            :          * Compare cgroup pointers in order to distinguish between
    1009                 :            :          * different cgroups in hierarchies.  As different cgroups may
    1010                 :            :          * share the same effective css, this comparison is always
    1011                 :            :          * necessary.
    1012                 :            :          */
    1013                 :          3 :         l1 = &cset->cgrp_links;
    1014                 :          3 :         l2 = &old_cset->cgrp_links;
    1015                 :            :         while (1) {
    1016                 :            :                 struct cgrp_cset_link *link1, *link2;
    1017                 :            :                 struct cgroup *cgrp1, *cgrp2;
    1018                 :            : 
    1019                 :          3 :                 l1 = l1->next;
    1020                 :          3 :                 l2 = l2->next;
    1021                 :            :                 /* See if we reached the end - both lists are equal length. */
    1022                 :          3 :                 if (l1 == &cset->cgrp_links) {
    1023                 :          3 :                         BUG_ON(l2 != &old_cset->cgrp_links);
    1024                 :            :                         break;
    1025                 :            :                 } else {
    1026                 :          3 :                         BUG_ON(l2 == &old_cset->cgrp_links);
    1027                 :            :                 }
    1028                 :            :                 /* Locate the cgroups associated with these links. */
    1029                 :            :                 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
    1030                 :            :                 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
    1031                 :          3 :                 cgrp1 = link1->cgrp;
    1032                 :          3 :                 cgrp2 = link2->cgrp;
    1033                 :            :                 /* Hierarchies should be linked in the same order. */
    1034                 :          3 :                 BUG_ON(cgrp1->root != cgrp2->root);
    1035                 :            : 
    1036                 :            :                 /*
    1037                 :            :                  * If this hierarchy is the hierarchy of the cgroup
    1038                 :            :                  * that's changing, then we need to check that this
    1039                 :            :                  * css_set points to the new cgroup; if it's any other
    1040                 :            :                  * hierarchy, then this css_set should point to the
    1041                 :            :                  * same cgroup as the old css_set.
    1042                 :            :                  */
    1043                 :          3 :                 if (cgrp1->root == new_cgrp->root) {
    1044                 :          3 :                         if (cgrp1 != new_cgrp)
    1045                 :            :                                 return false;
    1046                 :            :                 } else {
    1047                 :          3 :                         if (cgrp1 != cgrp2)
    1048                 :            :                                 return false;
    1049                 :            :                 }
    1050                 :            :         }
    1051                 :            :         return true;
    1052                 :            : }
    1053                 :            : 
    1054                 :            : /**
    1055                 :            :  * find_existing_css_set - init css array and find the matching css_set
    1056                 :            :  * @old_cset: the css_set that we're using before the cgroup transition
    1057                 :            :  * @cgrp: the cgroup that we're moving into
    1058                 :            :  * @template: out param for the new set of csses, should be clear on entry
    1059                 :            :  */
    1060                 :          3 : static struct css_set *find_existing_css_set(struct css_set *old_cset,
    1061                 :            :                                         struct cgroup *cgrp,
    1062                 :            :                                         struct cgroup_subsys_state *template[])
    1063                 :            : {
    1064                 :          3 :         struct cgroup_root *root = cgrp->root;
    1065                 :            :         struct cgroup_subsys *ss;
    1066                 :            :         struct css_set *cset;
    1067                 :            :         unsigned long key;
    1068                 :            :         int i;
    1069                 :            : 
    1070                 :            :         /*
    1071                 :            :          * Build the set of subsystem state objects that we want to see in the
    1072                 :            :          * new css_set. while subsystems can change globally, the entries here
    1073                 :            :          * won't change, so no need for locking.
    1074                 :            :          */
    1075                 :          3 :         for_each_subsys(ss, i) {
    1076                 :          3 :                 if (root->subsys_mask & (1UL << i)) {
    1077                 :            :                         /*
    1078                 :            :                          * @ss is in this hierarchy, so we want the
    1079                 :            :                          * effective css from @cgrp.
    1080                 :            :                          */
    1081                 :          3 :                         template[i] = cgroup_e_css_by_mask(cgrp, ss);
    1082                 :            :                 } else {
    1083                 :            :                         /*
    1084                 :            :                          * @ss is not in this hierarchy, so we don't want
    1085                 :            :                          * to change the css.
    1086                 :            :                          */
    1087                 :          3 :                         template[i] = old_cset->subsys[i];
    1088                 :            :                 }
    1089                 :            :         }
    1090                 :            : 
    1091                 :            :         key = css_set_hash(template);
    1092                 :          3 :         hash_for_each_possible(css_set_table, cset, hlist, key) {
    1093                 :          3 :                 if (!compare_css_sets(cset, old_cset, cgrp, template))
    1094                 :          3 :                         continue;
    1095                 :            : 
    1096                 :            :                 /* This css_set matches what we need */
    1097                 :          3 :                 return cset;
    1098                 :            :         }
    1099                 :            : 
    1100                 :            :         /* No existing cgroup group matched */
    1101                 :            :         return NULL;
    1102                 :            : }
    1103                 :            : 
    1104                 :          3 : static void free_cgrp_cset_links(struct list_head *links_to_free)
    1105                 :            : {
    1106                 :            :         struct cgrp_cset_link *link, *tmp_link;
    1107                 :            : 
    1108                 :          3 :         list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
    1109                 :            :                 list_del(&link->cset_link);
    1110                 :          3 :                 kfree(link);
    1111                 :            :         }
    1112                 :          3 : }
    1113                 :            : 
    1114                 :            : /**
    1115                 :            :  * allocate_cgrp_cset_links - allocate cgrp_cset_links
    1116                 :            :  * @count: the number of links to allocate
    1117                 :            :  * @tmp_links: list_head the allocated links are put on
    1118                 :            :  *
    1119                 :            :  * Allocate @count cgrp_cset_link structures and chain them on @tmp_links
    1120                 :            :  * through ->cset_link.  Returns 0 on success or -errno.
    1121                 :            :  */
    1122                 :          3 : static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
    1123                 :            : {
    1124                 :            :         struct cgrp_cset_link *link;
    1125                 :            :         int i;
    1126                 :            : 
    1127                 :            :         INIT_LIST_HEAD(tmp_links);
    1128                 :            : 
    1129                 :          3 :         for (i = 0; i < count; i++) {
    1130                 :          3 :                 link = kzalloc(sizeof(*link), GFP_KERNEL);
    1131                 :          3 :                 if (!link) {
    1132                 :          0 :                         free_cgrp_cset_links(tmp_links);
    1133                 :          0 :                         return -ENOMEM;
    1134                 :            :                 }
    1135                 :          3 :                 list_add(&link->cset_link, tmp_links);
    1136                 :            :         }
    1137                 :            :         return 0;
    1138                 :            : }
    1139                 :            : 
    1140                 :            : /**
    1141                 :            :  * link_css_set - a helper function to link a css_set to a cgroup
    1142                 :            :  * @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links()
    1143                 :            :  * @cset: the css_set to be linked
    1144                 :            :  * @cgrp: the destination cgroup
    1145                 :            :  */
    1146                 :          3 : static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
    1147                 :            :                          struct cgroup *cgrp)
    1148                 :            : {
    1149                 :            :         struct cgrp_cset_link *link;
    1150                 :            : 
    1151                 :          3 :         BUG_ON(list_empty(tmp_links));
    1152                 :            : 
    1153                 :          3 :         if (cgroup_on_dfl(cgrp))
    1154                 :          3 :                 cset->dfl_cgrp = cgrp;
    1155                 :            : 
    1156                 :          3 :         link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
    1157                 :          3 :         link->cset = cset;
    1158                 :          3 :         link->cgrp = cgrp;
    1159                 :            : 
    1160                 :            :         /*
    1161                 :            :          * Always add links to the tail of the lists so that the lists are
    1162                 :            :          * in choronological order.
    1163                 :            :          */
    1164                 :          3 :         list_move_tail(&link->cset_link, &cgrp->cset_links);
    1165                 :          3 :         list_add_tail(&link->cgrp_link, &cset->cgrp_links);
    1166                 :            : 
    1167                 :          3 :         if (cgroup_parent(cgrp))
    1168                 :          3 :                 cgroup_get_live(cgrp);
    1169                 :          3 : }
    1170                 :            : 
    1171                 :            : /**
    1172                 :            :  * find_css_set - return a new css_set with one cgroup updated
    1173                 :            :  * @old_cset: the baseline css_set
    1174                 :            :  * @cgrp: the cgroup to be updated
    1175                 :            :  *
    1176                 :            :  * Return a new css_set that's equivalent to @old_cset, but with @cgrp
    1177                 :            :  * substituted into the appropriate hierarchy.
    1178                 :            :  */
    1179                 :          3 : static struct css_set *find_css_set(struct css_set *old_cset,
    1180                 :            :                                     struct cgroup *cgrp)
    1181                 :            : {
    1182                 :          3 :         struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
    1183                 :            :         struct css_set *cset;
    1184                 :            :         struct list_head tmp_links;
    1185                 :            :         struct cgrp_cset_link *link;
    1186                 :            :         struct cgroup_subsys *ss;
    1187                 :            :         unsigned long key;
    1188                 :            :         int ssid;
    1189                 :            : 
    1190                 :            :         lockdep_assert_held(&cgroup_mutex);
    1191                 :            : 
    1192                 :            :         /* First see if we already have a cgroup group that matches
    1193                 :            :          * the desired set */
    1194                 :            :         spin_lock_irq(&css_set_lock);
    1195                 :          3 :         cset = find_existing_css_set(old_cset, cgrp, template);
    1196                 :          3 :         if (cset)
    1197                 :            :                 get_css_set(cset);
    1198                 :            :         spin_unlock_irq(&css_set_lock);
    1199                 :            : 
    1200                 :          3 :         if (cset)
    1201                 :            :                 return cset;
    1202                 :            : 
    1203                 :          3 :         cset = kzalloc(sizeof(*cset), GFP_KERNEL);
    1204                 :          3 :         if (!cset)
    1205                 :            :                 return NULL;
    1206                 :            : 
    1207                 :            :         /* Allocate all the cgrp_cset_link objects that we'll need */
    1208                 :          3 :         if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
    1209                 :          0 :                 kfree(cset);
    1210                 :          0 :                 return NULL;
    1211                 :            :         }
    1212                 :            : 
    1213                 :            :         refcount_set(&cset->refcount, 1);
    1214                 :          3 :         cset->dom_cset = cset;
    1215                 :          3 :         INIT_LIST_HEAD(&cset->tasks);
    1216                 :          3 :         INIT_LIST_HEAD(&cset->mg_tasks);
    1217                 :          3 :         INIT_LIST_HEAD(&cset->dying_tasks);
    1218                 :          3 :         INIT_LIST_HEAD(&cset->task_iters);
    1219                 :          3 :         INIT_LIST_HEAD(&cset->threaded_csets);
    1220                 :            :         INIT_HLIST_NODE(&cset->hlist);
    1221                 :          3 :         INIT_LIST_HEAD(&cset->cgrp_links);
    1222                 :          3 :         INIT_LIST_HEAD(&cset->mg_preload_node);
    1223                 :          3 :         INIT_LIST_HEAD(&cset->mg_node);
    1224                 :            : 
    1225                 :            :         /* Copy the set of subsystem state objects generated in
    1226                 :            :          * find_existing_css_set() */
    1227                 :          3 :         memcpy(cset->subsys, template, sizeof(cset->subsys));
    1228                 :            : 
    1229                 :            :         spin_lock_irq(&css_set_lock);
    1230                 :            :         /* Add reference counts and links from the new css_set. */
    1231                 :          3 :         list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
    1232                 :          3 :                 struct cgroup *c = link->cgrp;
    1233                 :            : 
    1234                 :          3 :                 if (c->root == cgrp->root)
    1235                 :            :                         c = cgrp;
    1236                 :          3 :                 link_css_set(&tmp_links, cset, c);
    1237                 :            :         }
    1238                 :            : 
    1239                 :          3 :         BUG_ON(!list_empty(&tmp_links));
    1240                 :            : 
    1241                 :          3 :         css_set_count++;
    1242                 :            : 
    1243                 :            :         /* Add @cset to the hash table */
    1244                 :            :         key = css_set_hash(cset->subsys);
    1245                 :          3 :         hash_add(css_set_table, &cset->hlist, key);
    1246                 :            : 
    1247                 :          3 :         for_each_subsys(ss, ssid) {
    1248                 :          3 :                 struct cgroup_subsys_state *css = cset->subsys[ssid];
    1249                 :            : 
    1250                 :          3 :                 list_add_tail(&cset->e_cset_node[ssid],
    1251                 :          3 :                               &css->cgroup->e_csets[ssid]);
    1252                 :            :                 css_get(css);
    1253                 :            :         }
    1254                 :            : 
    1255                 :            :         spin_unlock_irq(&css_set_lock);
    1256                 :            : 
    1257                 :            :         /*
    1258                 :            :          * If @cset should be threaded, look up the matching dom_cset and
    1259                 :            :          * link them up.  We first fully initialize @cset then look for the
    1260                 :            :          * dom_cset.  It's simpler this way and safe as @cset is guaranteed
    1261                 :            :          * to stay empty until we return.
    1262                 :            :          */
    1263                 :          3 :         if (cgroup_is_threaded(cset->dfl_cgrp)) {
    1264                 :            :                 struct css_set *dcset;
    1265                 :            : 
    1266                 :          0 :                 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
    1267                 :          0 :                 if (!dcset) {
    1268                 :          0 :                         put_css_set(cset);
    1269                 :          0 :                         return NULL;
    1270                 :            :                 }
    1271                 :            : 
    1272                 :            :                 spin_lock_irq(&css_set_lock);
    1273                 :          0 :                 cset->dom_cset = dcset;
    1274                 :          0 :                 list_add_tail(&cset->threaded_csets_node,
    1275                 :            :                               &dcset->threaded_csets);
    1276                 :            :                 spin_unlock_irq(&css_set_lock);
    1277                 :            :         }
    1278                 :            : 
    1279                 :          3 :         return cset;
    1280                 :            : }
    1281                 :            : 
    1282                 :          3 : struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
    1283                 :            : {
    1284                 :          3 :         struct cgroup *root_cgrp = kf_root->kn->priv;
    1285                 :            : 
    1286                 :          3 :         return root_cgrp->root;
    1287                 :            : }
    1288                 :            : 
    1289                 :          3 : static int cgroup_init_root_id(struct cgroup_root *root)
    1290                 :            : {
    1291                 :            :         int id;
    1292                 :            : 
    1293                 :            :         lockdep_assert_held(&cgroup_mutex);
    1294                 :            : 
    1295                 :          3 :         id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
    1296                 :          3 :         if (id < 0)
    1297                 :            :                 return id;
    1298                 :            : 
    1299                 :          3 :         root->hierarchy_id = id;
    1300                 :          3 :         return 0;
    1301                 :            : }
    1302                 :            : 
    1303                 :            : static void cgroup_exit_root_id(struct cgroup_root *root)
    1304                 :            : {
    1305                 :            :         lockdep_assert_held(&cgroup_mutex);
    1306                 :            : 
    1307                 :          0 :         idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
    1308                 :            : }
    1309                 :            : 
    1310                 :          0 : void cgroup_free_root(struct cgroup_root *root)
    1311                 :            : {
    1312                 :          0 :         if (root) {
    1313                 :          0 :                 idr_destroy(&root->cgroup_idr);
    1314                 :          0 :                 kfree(root);
    1315                 :            :         }
    1316                 :          0 : }
    1317                 :            : 
    1318                 :          0 : static void cgroup_destroy_root(struct cgroup_root *root)
    1319                 :            : {
    1320                 :            :         struct cgroup *cgrp = &root->cgrp;
    1321                 :            :         struct cgrp_cset_link *link, *tmp_link;
    1322                 :            : 
    1323                 :          0 :         trace_cgroup_destroy_root(root);
    1324                 :            : 
    1325                 :          0 :         cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
    1326                 :            : 
    1327                 :          0 :         BUG_ON(atomic_read(&root->nr_cgrps));
    1328                 :          0 :         BUG_ON(!list_empty(&cgrp->self.children));
    1329                 :            : 
    1330                 :            :         /* Rebind all subsystems back to the default hierarchy */
    1331                 :          0 :         WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
    1332                 :            : 
    1333                 :            :         /*
    1334                 :            :          * Release all the links from cset_links to this hierarchy's
    1335                 :            :          * root cgroup
    1336                 :            :          */
    1337                 :            :         spin_lock_irq(&css_set_lock);
    1338                 :            : 
    1339                 :          0 :         list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
    1340                 :            :                 list_del(&link->cset_link);
    1341                 :            :                 list_del(&link->cgrp_link);
    1342                 :          0 :                 kfree(link);
    1343                 :            :         }
    1344                 :            : 
    1345                 :            :         spin_unlock_irq(&css_set_lock);
    1346                 :            : 
    1347                 :          0 :         if (!list_empty(&root->root_list)) {
    1348                 :            :                 list_del(&root->root_list);
    1349                 :          0 :                 cgroup_root_count--;
    1350                 :            :         }
    1351                 :            : 
    1352                 :            :         cgroup_exit_root_id(root);
    1353                 :            : 
    1354                 :          0 :         mutex_unlock(&cgroup_mutex);
    1355                 :            : 
    1356                 :          0 :         kernfs_destroy_root(root->kf_root);
    1357                 :          0 :         cgroup_free_root(root);
    1358                 :          0 : }
    1359                 :            : 
    1360                 :            : /*
    1361                 :            :  * look up cgroup associated with current task's cgroup namespace on the
    1362                 :            :  * specified hierarchy
    1363                 :            :  */
    1364                 :            : static struct cgroup *
    1365                 :          3 : current_cgns_cgroup_from_root(struct cgroup_root *root)
    1366                 :            : {
    1367                 :            :         struct cgroup *res = NULL;
    1368                 :            :         struct css_set *cset;
    1369                 :            : 
    1370                 :            :         lockdep_assert_held(&css_set_lock);
    1371                 :            : 
    1372                 :            :         rcu_read_lock();
    1373                 :            : 
    1374                 :          3 :         cset = current->nsproxy->cgroup_ns->root_cset;
    1375                 :          3 :         if (cset == &init_css_set) {
    1376                 :          3 :                 res = &root->cgrp;
    1377                 :            :         } else {
    1378                 :            :                 struct cgrp_cset_link *link;
    1379                 :            : 
    1380                 :          0 :                 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
    1381                 :          0 :                         struct cgroup *c = link->cgrp;
    1382                 :            : 
    1383                 :          0 :                         if (c->root == root) {
    1384                 :          0 :                                 res = c;
    1385                 :          0 :                                 break;
    1386                 :            :                         }
    1387                 :            :                 }
    1388                 :            :         }
    1389                 :            :         rcu_read_unlock();
    1390                 :            : 
    1391                 :          3 :         BUG_ON(!res);
    1392                 :          3 :         return res;
    1393                 :            : }
    1394                 :            : 
    1395                 :            : /* look up cgroup associated with given css_set on the specified hierarchy */
    1396                 :          3 : static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
    1397                 :            :                                             struct cgroup_root *root)
    1398                 :            : {
    1399                 :            :         struct cgroup *res = NULL;
    1400                 :            : 
    1401                 :            :         lockdep_assert_held(&cgroup_mutex);
    1402                 :            :         lockdep_assert_held(&css_set_lock);
    1403                 :            : 
    1404                 :          3 :         if (cset == &init_css_set) {
    1405                 :          3 :                 res = &root->cgrp;
    1406                 :          3 :         } else if (root == &cgrp_dfl_root) {
    1407                 :          3 :                 res = cset->dfl_cgrp;
    1408                 :            :         } else {
    1409                 :            :                 struct cgrp_cset_link *link;
    1410                 :            : 
    1411                 :          3 :                 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
    1412                 :          3 :                         struct cgroup *c = link->cgrp;
    1413                 :            : 
    1414                 :          3 :                         if (c->root == root) {
    1415                 :          3 :                                 res = c;
    1416                 :          3 :                                 break;
    1417                 :            :                         }
    1418                 :            :                 }
    1419                 :            :         }
    1420                 :            : 
    1421                 :          3 :         BUG_ON(!res);
    1422                 :          3 :         return res;
    1423                 :            : }
    1424                 :            : 
    1425                 :            : /*
    1426                 :            :  * Return the cgroup for "task" from the given hierarchy. Must be
    1427                 :            :  * called with cgroup_mutex and css_set_lock held.
    1428                 :            :  */
    1429                 :          0 : struct cgroup *task_cgroup_from_root(struct task_struct *task,
    1430                 :            :                                      struct cgroup_root *root)
    1431                 :            : {
    1432                 :            :         /*
    1433                 :            :          * No need to lock the task - since we hold cgroup_mutex the
    1434                 :            :          * task can't change groups, so the only thing that can happen
    1435                 :            :          * is that it exits and its css is set back to init_css_set.
    1436                 :            :          */
    1437                 :          3 :         return cset_cgroup_from_root(task_css_set(task), root);
    1438                 :            : }
    1439                 :            : 
    1440                 :            : /*
    1441                 :            :  * A task must hold cgroup_mutex to modify cgroups.
    1442                 :            :  *
    1443                 :            :  * Any task can increment and decrement the count field without lock.
    1444                 :            :  * So in general, code holding cgroup_mutex can't rely on the count
    1445                 :            :  * field not changing.  However, if the count goes to zero, then only
    1446                 :            :  * cgroup_attach_task() can increment it again.  Because a count of zero
    1447                 :            :  * means that no tasks are currently attached, therefore there is no
    1448                 :            :  * way a task attached to that cgroup can fork (the other way to
    1449                 :            :  * increment the count).  So code holding cgroup_mutex can safely
    1450                 :            :  * assume that if the count is zero, it will stay zero. Similarly, if
    1451                 :            :  * a task holds cgroup_mutex on a cgroup with zero count, it
    1452                 :            :  * knows that the cgroup won't be removed, as cgroup_rmdir()
    1453                 :            :  * needs that mutex.
    1454                 :            :  *
    1455                 :            :  * A cgroup can only be deleted if both its 'count' of using tasks
    1456                 :            :  * is zero, and its list of 'children' cgroups is empty.  Since all
    1457                 :            :  * tasks in the system use _some_ cgroup, and since there is always at
    1458                 :            :  * least one task in the system (init, pid == 1), therefore, root cgroup
    1459                 :            :  * always has either children cgroups and/or using tasks.  So we don't
    1460                 :            :  * need a special hack to ensure that root cgroup cannot be deleted.
    1461                 :            :  *
    1462                 :            :  * P.S.  One more locking exception.  RCU is used to guard the
    1463                 :            :  * update of a tasks cgroup pointer by cgroup_attach_task()
    1464                 :            :  */
    1465                 :            : 
    1466                 :            : static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
    1467                 :            : 
    1468                 :          3 : static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
    1469                 :            :                               char *buf)
    1470                 :            : {
    1471                 :          3 :         struct cgroup_subsys *ss = cft->ss;
    1472                 :            : 
    1473                 :          3 :         if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
    1474                 :          3 :             !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
    1475                 :          3 :                 const char *dbg = (cft->flags & CFTYPE_DEBUG) ? ".__DEBUG__." : "";
    1476                 :            : 
    1477                 :          3 :                 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
    1478                 :            :                          dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
    1479                 :          3 :                          cft->name);
    1480                 :            :         } else {
    1481                 :          3 :                 strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
    1482                 :            :         }
    1483                 :          3 :         return buf;
    1484                 :            : }
    1485                 :            : 
    1486                 :            : /**
    1487                 :            :  * cgroup_file_mode - deduce file mode of a control file
    1488                 :            :  * @cft: the control file in question
    1489                 :            :  *
    1490                 :            :  * S_IRUGO for read, S_IWUSR for write.
    1491                 :            :  */
    1492                 :          3 : static umode_t cgroup_file_mode(const struct cftype *cft)
    1493                 :            : {
    1494                 :            :         umode_t mode = 0;
    1495                 :            : 
    1496                 :          3 :         if (cft->read_u64 || cft->read_s64 || cft->seq_show)
    1497                 :            :                 mode |= S_IRUGO;
    1498                 :            : 
    1499                 :          3 :         if (cft->write_u64 || cft->write_s64 || cft->write) {
    1500                 :          3 :                 if (cft->flags & CFTYPE_WORLD_WRITABLE)
    1501                 :          0 :                         mode |= S_IWUGO;
    1502                 :            :                 else
    1503                 :          3 :                         mode |= S_IWUSR;
    1504                 :            :         }
    1505                 :            : 
    1506                 :          3 :         return mode;
    1507                 :            : }
    1508                 :            : 
    1509                 :            : /**
    1510                 :            :  * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask
    1511                 :            :  * @subtree_control: the new subtree_control mask to consider
    1512                 :            :  * @this_ss_mask: available subsystems
    1513                 :            :  *
    1514                 :            :  * On the default hierarchy, a subsystem may request other subsystems to be
    1515                 :            :  * enabled together through its ->depends_on mask.  In such cases, more
    1516                 :            :  * subsystems than specified in "cgroup.subtree_control" may be enabled.
    1517                 :            :  *
    1518                 :            :  * This function calculates which subsystems need to be enabled if
    1519                 :            :  * @subtree_control is to be applied while restricted to @this_ss_mask.
    1520                 :            :  */
    1521                 :          3 : static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
    1522                 :            : {
    1523                 :            :         u16 cur_ss_mask = subtree_control;
    1524                 :            :         struct cgroup_subsys *ss;
    1525                 :            :         int ssid;
    1526                 :            : 
    1527                 :            :         lockdep_assert_held(&cgroup_mutex);
    1528                 :            : 
    1529                 :          3 :         cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
    1530                 :            : 
    1531                 :            :         while (true) {
    1532                 :            :                 u16 new_ss_mask = cur_ss_mask;
    1533                 :            : 
    1534                 :          3 :                 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
    1535                 :          3 :                         new_ss_mask |= ss->depends_on;
    1536                 :            :                 } while_each_subsys_mask();
    1537                 :            : 
    1538                 :            :                 /*
    1539                 :            :                  * Mask out subsystems which aren't available.  This can
    1540                 :            :                  * happen only if some depended-upon subsystems were bound
    1541                 :            :                  * to non-default hierarchies.
    1542                 :            :                  */
    1543                 :          3 :                 new_ss_mask &= this_ss_mask;
    1544                 :            : 
    1545                 :          3 :                 if (new_ss_mask == cur_ss_mask)
    1546                 :            :                         break;
    1547                 :            :                 cur_ss_mask = new_ss_mask;
    1548                 :            :         }
    1549                 :            : 
    1550                 :          3 :         return cur_ss_mask;
    1551                 :            : }
    1552                 :            : 
    1553                 :            : /**
    1554                 :            :  * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
    1555                 :            :  * @kn: the kernfs_node being serviced
    1556                 :            :  *
    1557                 :            :  * This helper undoes cgroup_kn_lock_live() and should be invoked before
    1558                 :            :  * the method finishes if locking succeeded.  Note that once this function
    1559                 :            :  * returns the cgroup returned by cgroup_kn_lock_live() may become
    1560                 :            :  * inaccessible any time.  If the caller intends to continue to access the
    1561                 :            :  * cgroup, it should pin it before invoking this function.
    1562                 :            :  */
    1563                 :          3 : void cgroup_kn_unlock(struct kernfs_node *kn)
    1564                 :            : {
    1565                 :            :         struct cgroup *cgrp;
    1566                 :            : 
    1567                 :          3 :         if (kernfs_type(kn) == KERNFS_DIR)
    1568                 :          3 :                 cgrp = kn->priv;
    1569                 :            :         else
    1570                 :          3 :                 cgrp = kn->parent->priv;
    1571                 :            : 
    1572                 :          3 :         mutex_unlock(&cgroup_mutex);
    1573                 :            : 
    1574                 :          3 :         kernfs_unbreak_active_protection(kn);
    1575                 :            :         cgroup_put(cgrp);
    1576                 :          3 : }
    1577                 :            : 
    1578                 :            : /**
    1579                 :            :  * cgroup_kn_lock_live - locking helper for cgroup kernfs methods
    1580                 :            :  * @kn: the kernfs_node being serviced
    1581                 :            :  * @drain_offline: perform offline draining on the cgroup
    1582                 :            :  *
    1583                 :            :  * This helper is to be used by a cgroup kernfs method currently servicing
    1584                 :            :  * @kn.  It breaks the active protection, performs cgroup locking and
    1585                 :            :  * verifies that the associated cgroup is alive.  Returns the cgroup if
    1586                 :            :  * alive; otherwise, %NULL.  A successful return should be undone by a
    1587                 :            :  * matching cgroup_kn_unlock() invocation.  If @drain_offline is %true, the
    1588                 :            :  * cgroup is drained of offlining csses before return.
    1589                 :            :  *
    1590                 :            :  * Any cgroup kernfs method implementation which requires locking the
    1591                 :            :  * associated cgroup should use this helper.  It avoids nesting cgroup
    1592                 :            :  * locking under kernfs active protection and allows all kernfs operations
    1593                 :            :  * including self-removal.
    1594                 :            :  */
    1595                 :          3 : struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
    1596                 :            : {
    1597                 :            :         struct cgroup *cgrp;
    1598                 :            : 
    1599                 :          3 :         if (kernfs_type(kn) == KERNFS_DIR)
    1600                 :          3 :                 cgrp = kn->priv;
    1601                 :            :         else
    1602                 :          3 :                 cgrp = kn->parent->priv;
    1603                 :            : 
    1604                 :            :         /*
    1605                 :            :          * We're gonna grab cgroup_mutex which nests outside kernfs
    1606                 :            :          * active_ref.  cgroup liveliness check alone provides enough
    1607                 :            :          * protection against removal.  Ensure @cgrp stays accessible and
    1608                 :            :          * break the active_ref protection.
    1609                 :            :          */
    1610                 :          3 :         if (!cgroup_tryget(cgrp))
    1611                 :            :                 return NULL;
    1612                 :          3 :         kernfs_break_active_protection(kn);
    1613                 :            : 
    1614                 :          3 :         if (drain_offline)
    1615                 :          0 :                 cgroup_lock_and_drain_offline(cgrp);
    1616                 :            :         else
    1617                 :          3 :                 mutex_lock(&cgroup_mutex);
    1618                 :            : 
    1619                 :          3 :         if (!cgroup_is_dead(cgrp))
    1620                 :            :                 return cgrp;
    1621                 :            : 
    1622                 :          0 :         cgroup_kn_unlock(kn);
    1623                 :          0 :         return NULL;
    1624                 :            : }
    1625                 :            : 
    1626                 :          3 : static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
    1627                 :            : {
    1628                 :            :         char name[CGROUP_FILE_NAME_MAX];
    1629                 :            : 
    1630                 :            :         lockdep_assert_held(&cgroup_mutex);
    1631                 :            : 
    1632                 :          3 :         if (cft->file_offset) {
    1633                 :          3 :                 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
    1634                 :          3 :                 struct cgroup_file *cfile = (void *)css + cft->file_offset;
    1635                 :            : 
    1636                 :            :                 spin_lock_irq(&cgroup_file_kn_lock);
    1637                 :          3 :                 cfile->kn = NULL;
    1638                 :            :                 spin_unlock_irq(&cgroup_file_kn_lock);
    1639                 :            : 
    1640                 :          3 :                 del_timer_sync(&cfile->notify_timer);
    1641                 :            :         }
    1642                 :            : 
    1643                 :          3 :         kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
    1644                 :          3 : }
    1645                 :            : 
    1646                 :            : /**
    1647                 :            :  * css_clear_dir - remove subsys files in a cgroup directory
    1648                 :            :  * @css: taget css
    1649                 :            :  */
    1650                 :          3 : static void css_clear_dir(struct cgroup_subsys_state *css)
    1651                 :            : {
    1652                 :          3 :         struct cgroup *cgrp = css->cgroup;
    1653                 :            :         struct cftype *cfts;
    1654                 :            : 
    1655                 :          3 :         if (!(css->flags & CSS_VISIBLE))
    1656                 :          3 :                 return;
    1657                 :            : 
    1658                 :          3 :         css->flags &= ~CSS_VISIBLE;
    1659                 :            : 
    1660                 :          3 :         if (!css->ss) {
    1661                 :          3 :                 if (cgroup_on_dfl(cgrp))
    1662                 :            :                         cfts = cgroup_base_files;
    1663                 :            :                 else
    1664                 :            :                         cfts = cgroup1_base_files;
    1665                 :            : 
    1666                 :          3 :                 cgroup_addrm_files(css, cgrp, cfts, false);
    1667                 :            :         } else {
    1668                 :          3 :                 list_for_each_entry(cfts, &css->ss->cfts, node)
    1669                 :          3 :                         cgroup_addrm_files(css, cgrp, cfts, false);
    1670                 :            :         }
    1671                 :            : }
    1672                 :            : 
    1673                 :            : /**
    1674                 :            :  * css_populate_dir - create subsys files in a cgroup directory
    1675                 :            :  * @css: target css
    1676                 :            :  *
    1677                 :            :  * On failure, no file is added.
    1678                 :            :  */
    1679                 :          3 : static int css_populate_dir(struct cgroup_subsys_state *css)
    1680                 :            : {
    1681                 :          3 :         struct cgroup *cgrp = css->cgroup;
    1682                 :            :         struct cftype *cfts, *failed_cfts;
    1683                 :            :         int ret;
    1684                 :            : 
    1685                 :          3 :         if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
    1686                 :            :                 return 0;
    1687                 :            : 
    1688                 :          3 :         if (!css->ss) {
    1689                 :          3 :                 if (cgroup_on_dfl(cgrp))
    1690                 :            :                         cfts = cgroup_base_files;
    1691                 :            :                 else
    1692                 :            :                         cfts = cgroup1_base_files;
    1693                 :            : 
    1694                 :          3 :                 ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
    1695                 :          3 :                 if (ret < 0)
    1696                 :            :                         return ret;
    1697                 :            :         } else {
    1698                 :          3 :                 list_for_each_entry(cfts, &css->ss->cfts, node) {
    1699                 :          3 :                         ret = cgroup_addrm_files(css, cgrp, cfts, true);
    1700                 :          3 :                         if (ret < 0) {
    1701                 :          0 :                                 failed_cfts = cfts;
    1702                 :            :                                 goto err;
    1703                 :            :                         }
    1704                 :            :                 }
    1705                 :            :         }
    1706                 :            : 
    1707                 :          3 :         css->flags |= CSS_VISIBLE;
    1708                 :            : 
    1709                 :          3 :         return 0;
    1710                 :            : err:
    1711                 :          0 :         list_for_each_entry(cfts, &css->ss->cfts, node) {
    1712                 :          0 :                 if (cfts == failed_cfts)
    1713                 :            :                         break;
    1714                 :          0 :                 cgroup_addrm_files(css, cgrp, cfts, false);
    1715                 :            :         }
    1716                 :          0 :         return ret;
    1717                 :            : }
    1718                 :            : 
    1719                 :          3 : int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
    1720                 :            : {
    1721                 :          3 :         struct cgroup *dcgrp = &dst_root->cgrp;
    1722                 :            :         struct cgroup_subsys *ss;
    1723                 :            :         int ssid, i, ret;
    1724                 :            : 
    1725                 :            :         lockdep_assert_held(&cgroup_mutex);
    1726                 :            : 
    1727                 :          3 :         do_each_subsys_mask(ss, ssid, ss_mask) {
    1728                 :            :                 /*
    1729                 :            :                  * If @ss has non-root csses attached to it, can't move.
    1730                 :            :                  * If @ss is an implicit controller, it is exempt from this
    1731                 :            :                  * rule and can be stolen.
    1732                 :            :                  */
    1733                 :          3 :                 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
    1734                 :          0 :                     !ss->implicit_on_dfl)
    1735                 :          0 :                         return -EBUSY;
    1736                 :            : 
    1737                 :            :                 /* can't move between two non-dummy roots either */
    1738                 :          3 :                 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
    1739                 :            :                         return -EBUSY;
    1740                 :            :         } while_each_subsys_mask();
    1741                 :            : 
    1742                 :          3 :         do_each_subsys_mask(ss, ssid, ss_mask) {
    1743                 :          3 :                 struct cgroup_root *src_root = ss->root;
    1744                 :          3 :                 struct cgroup *scgrp = &src_root->cgrp;
    1745                 :            :                 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
    1746                 :            :                 struct css_set *cset;
    1747                 :            : 
    1748                 :          3 :                 WARN_ON(!css || cgroup_css(dcgrp, ss));
    1749                 :            : 
    1750                 :            :                 /* disable from the source */
    1751                 :          3 :                 src_root->subsys_mask &= ~(1 << ssid);
    1752                 :          3 :                 WARN_ON(cgroup_apply_control(scgrp));
    1753                 :          3 :                 cgroup_finalize_control(scgrp, 0);
    1754                 :            : 
    1755                 :            :                 /* rebind */
    1756                 :          3 :                 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
    1757                 :          3 :                 rcu_assign_pointer(dcgrp->subsys[ssid], css);
    1758                 :          3 :                 ss->root = dst_root;
    1759                 :          3 :                 css->cgroup = dcgrp;
    1760                 :            : 
    1761                 :            :                 spin_lock_irq(&css_set_lock);
    1762                 :          3 :                 hash_for_each(css_set_table, i, cset, hlist)
    1763                 :          3 :                         list_move_tail(&cset->e_cset_node[ss->id],
    1764                 :            :                                        &dcgrp->e_csets[ss->id]);
    1765                 :            :                 spin_unlock_irq(&css_set_lock);
    1766                 :            : 
    1767                 :            :                 /* default hierarchy doesn't enable controllers by default */
    1768                 :          3 :                 dst_root->subsys_mask |= 1 << ssid;
    1769                 :          3 :                 if (dst_root == &cgrp_dfl_root) {
    1770                 :          0 :                         static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
    1771                 :            :                 } else {
    1772                 :          3 :                         dcgrp->subtree_control |= 1 << ssid;
    1773                 :          3 :                         static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
    1774                 :            :                 }
    1775                 :            : 
    1776                 :          3 :                 ret = cgroup_apply_control(dcgrp);
    1777                 :          3 :                 if (ret)
    1778                 :          0 :                         pr_warn("partial failure to rebind %s controller (err=%d)\n",
    1779                 :            :                                 ss->name, ret);
    1780                 :            : 
    1781                 :          3 :                 if (ss->bind)
    1782                 :          3 :                         ss->bind(css);
    1783                 :            :         } while_each_subsys_mask();
    1784                 :            : 
    1785                 :          3 :         kernfs_activate(dcgrp->kn);
    1786                 :          3 :         return 0;
    1787                 :            : }
    1788                 :            : 
    1789                 :          3 : int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
    1790                 :            :                      struct kernfs_root *kf_root)
    1791                 :            : {
    1792                 :            :         int len = 0;
    1793                 :            :         char *buf = NULL;
    1794                 :            :         struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
    1795                 :            :         struct cgroup *ns_cgroup;
    1796                 :            : 
    1797                 :            :         buf = kmalloc(PATH_MAX, GFP_KERNEL);
    1798                 :          3 :         if (!buf)
    1799                 :            :                 return -ENOMEM;
    1800                 :            : 
    1801                 :            :         spin_lock_irq(&css_set_lock);
    1802                 :          3 :         ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
    1803                 :          3 :         len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
    1804                 :            :         spin_unlock_irq(&css_set_lock);
    1805                 :            : 
    1806                 :          3 :         if (len >= PATH_MAX)
    1807                 :            :                 len = -ERANGE;
    1808                 :          3 :         else if (len > 0) {
    1809                 :          3 :                 seq_escape(sf, buf, " \t\n\\");
    1810                 :            :                 len = 0;
    1811                 :            :         }
    1812                 :          3 :         kfree(buf);
    1813                 :          3 :         return len;
    1814                 :            : }
    1815                 :            : 
    1816                 :            : enum cgroup2_param {
    1817                 :            :         Opt_nsdelegate,
    1818                 :            :         Opt_memory_localevents,
    1819                 :            :         nr__cgroup2_params
    1820                 :            : };
    1821                 :            : 
    1822                 :            : static const struct fs_parameter_spec cgroup2_param_specs[] = {
    1823                 :            :         fsparam_flag("nsdelegate",            Opt_nsdelegate),
    1824                 :            :         fsparam_flag("memory_localevents",    Opt_memory_localevents),
    1825                 :            :         {}
    1826                 :            : };
    1827                 :            : 
    1828                 :            : static const struct fs_parameter_description cgroup2_fs_parameters = {
    1829                 :            :         .name           = "cgroup2",
    1830                 :            :         .specs          = cgroup2_param_specs,
    1831                 :            : };
    1832                 :            : 
    1833                 :          3 : static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param)
    1834                 :            : {
    1835                 :            :         struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
    1836                 :            :         struct fs_parse_result result;
    1837                 :            :         int opt;
    1838                 :            : 
    1839                 :          3 :         opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result);
    1840                 :          3 :         if (opt < 0)
    1841                 :            :                 return opt;
    1842                 :            : 
    1843                 :          3 :         switch (opt) {
    1844                 :            :         case Opt_nsdelegate:
    1845                 :          3 :                 ctx->flags |= CGRP_ROOT_NS_DELEGATE;
    1846                 :          3 :                 return 0;
    1847                 :            :         case Opt_memory_localevents:
    1848                 :          0 :                 ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
    1849                 :          0 :                 return 0;
    1850                 :            :         }
    1851                 :            :         return -EINVAL;
    1852                 :            : }
    1853                 :            : 
    1854                 :          3 : static void apply_cgroup_root_flags(unsigned int root_flags)
    1855                 :            : {
    1856                 :          3 :         if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
    1857                 :          3 :                 if (root_flags & CGRP_ROOT_NS_DELEGATE)
    1858                 :          3 :                         cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
    1859                 :            :                 else
    1860                 :          0 :                         cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
    1861                 :            : 
    1862                 :          3 :                 if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
    1863                 :          0 :                         cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
    1864                 :            :                 else
    1865                 :          3 :                         cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
    1866                 :            :         }
    1867                 :          3 : }
    1868                 :            : 
    1869                 :          3 : static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
    1870                 :            : {
    1871                 :          3 :         if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
    1872                 :          3 :                 seq_puts(seq, ",nsdelegate");
    1873                 :          3 :         if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
    1874                 :          0 :                 seq_puts(seq, ",memory_localevents");
    1875                 :          3 :         return 0;
    1876                 :            : }
    1877                 :            : 
    1878                 :          0 : static int cgroup_reconfigure(struct fs_context *fc)
    1879                 :            : {
    1880                 :            :         struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
    1881                 :            : 
    1882                 :          0 :         apply_cgroup_root_flags(ctx->flags);
    1883                 :          0 :         return 0;
    1884                 :            : }
    1885                 :            : 
    1886                 :            : /*
    1887                 :            :  * To reduce the fork() overhead for systems that are not actually using
    1888                 :            :  * their cgroups capability, we don't maintain the lists running through
    1889                 :            :  * each css_set to its tasks until we see the list actually used - in other
    1890                 :            :  * words after the first mount.
    1891                 :            :  */
    1892                 :            : static bool use_task_css_set_links __read_mostly;
    1893                 :            : 
    1894                 :          3 : void cgroup_enable_task_cg_lists(void)
    1895                 :            : {
    1896                 :            :         struct task_struct *p, *g;
    1897                 :            : 
    1898                 :            :         /*
    1899                 :            :          * We need tasklist_lock because RCU is not safe against
    1900                 :            :          * while_each_thread(). Besides, a forking task that has passed
    1901                 :            :          * cgroup_post_fork() without seeing use_task_css_set_links = 1
    1902                 :            :          * is not guaranteed to have its child immediately visible in the
    1903                 :            :          * tasklist if we walk through it with RCU.
    1904                 :            :          */
    1905                 :          3 :         read_lock(&tasklist_lock);
    1906                 :            :         spin_lock_irq(&css_set_lock);
    1907                 :            : 
    1908                 :          3 :         if (use_task_css_set_links)
    1909                 :            :                 goto out_unlock;
    1910                 :            : 
    1911                 :          3 :         use_task_css_set_links = true;
    1912                 :            : 
    1913                 :          3 :         do_each_thread(g, p) {
    1914                 :          3 :                 WARN_ON_ONCE(!list_empty(&p->cg_list) ||
    1915                 :            :                              task_css_set(p) != &init_css_set);
    1916                 :            : 
    1917                 :            :                 /*
    1918                 :            :                  * We should check if the process is exiting, otherwise
    1919                 :            :                  * it will race with cgroup_exit() in that the list
    1920                 :            :                  * entry won't be deleted though the process has exited.
    1921                 :            :                  * Do it while holding siglock so that we don't end up
    1922                 :            :                  * racing against cgroup_exit().
    1923                 :            :                  *
    1924                 :            :                  * Interrupts were already disabled while acquiring
    1925                 :            :                  * the css_set_lock, so we do not need to disable it
    1926                 :            :                  * again when acquiring the sighand->siglock here.
    1927                 :            :                  */
    1928                 :          3 :                 spin_lock(&p->sighand->siglock);
    1929                 :          3 :                 if (!(p->flags & PF_EXITING)) {
    1930                 :            :                         struct css_set *cset = task_css_set(p);
    1931                 :            : 
    1932                 :          3 :                         if (!css_set_populated(cset))
    1933                 :          3 :                                 css_set_update_populated(cset, true);
    1934                 :            :                         list_add_tail(&p->cg_list, &cset->tasks);
    1935                 :            :                         get_css_set(cset);
    1936                 :          3 :                         cset->nr_tasks++;
    1937                 :            :                 }
    1938                 :          3 :                 spin_unlock(&p->sighand->siglock);
    1939                 :          3 :         } while_each_thread(g, p);
    1940                 :            : out_unlock:
    1941                 :            :         spin_unlock_irq(&css_set_lock);
    1942                 :            :         read_unlock(&tasklist_lock);
    1943                 :          3 : }
    1944                 :            : 
    1945                 :          3 : static void init_cgroup_housekeeping(struct cgroup *cgrp)
    1946                 :            : {
    1947                 :            :         struct cgroup_subsys *ss;
    1948                 :            :         int ssid;
    1949                 :            : 
    1950                 :          3 :         INIT_LIST_HEAD(&cgrp->self.sibling);
    1951                 :          3 :         INIT_LIST_HEAD(&cgrp->self.children);
    1952                 :          3 :         INIT_LIST_HEAD(&cgrp->cset_links);
    1953                 :          3 :         INIT_LIST_HEAD(&cgrp->pidlists);
    1954                 :          3 :         mutex_init(&cgrp->pidlist_mutex);
    1955                 :          3 :         cgrp->self.cgroup = cgrp;
    1956                 :          3 :         cgrp->self.flags |= CSS_ONLINE;
    1957                 :          3 :         cgrp->dom_cgrp = cgrp;
    1958                 :          3 :         cgrp->max_descendants = INT_MAX;
    1959                 :          3 :         cgrp->max_depth = INT_MAX;
    1960                 :          3 :         INIT_LIST_HEAD(&cgrp->rstat_css_list);
    1961                 :            :         prev_cputime_init(&cgrp->prev_cputime);
    1962                 :            : 
    1963                 :          3 :         for_each_subsys(ss, ssid)
    1964                 :          3 :                 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
    1965                 :            : 
    1966                 :          3 :         init_waitqueue_head(&cgrp->offline_waitq);
    1967                 :          3 :         INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
    1968                 :          3 : }
    1969                 :            : 
    1970                 :          3 : void init_cgroup_root(struct cgroup_fs_context *ctx)
    1971                 :            : {
    1972                 :          3 :         struct cgroup_root *root = ctx->root;
    1973                 :          3 :         struct cgroup *cgrp = &root->cgrp;
    1974                 :            : 
    1975                 :          3 :         INIT_LIST_HEAD(&root->root_list);
    1976                 :            :         atomic_set(&root->nr_cgrps, 1);
    1977                 :          3 :         cgrp->root = root;
    1978                 :          3 :         init_cgroup_housekeeping(cgrp);
    1979                 :            :         idr_init(&root->cgroup_idr);
    1980                 :            : 
    1981                 :          3 :         root->flags = ctx->flags;
    1982                 :          3 :         if (ctx->release_agent)
    1983                 :          0 :                 strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
    1984                 :          3 :         if (ctx->name)
    1985                 :          3 :                 strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
    1986                 :          3 :         if (ctx->cpuset_clone_children)
    1987                 :          0 :                 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
    1988                 :          3 : }
    1989                 :            : 
    1990                 :          3 : int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
    1991                 :            : {
    1992                 :          3 :         LIST_HEAD(tmp_links);
    1993                 :          3 :         struct cgroup *root_cgrp = &root->cgrp;
    1994                 :            :         struct kernfs_syscall_ops *kf_sops;
    1995                 :            :         struct css_set *cset;
    1996                 :            :         int i, ret;
    1997                 :            : 
    1998                 :            :         lockdep_assert_held(&cgroup_mutex);
    1999                 :            : 
    2000                 :          3 :         ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
    2001                 :          3 :         if (ret < 0)
    2002                 :            :                 goto out;
    2003                 :          3 :         root_cgrp->id = ret;
    2004                 :          3 :         root_cgrp->ancestor_ids[0] = ret;
    2005                 :            : 
    2006                 :          3 :         ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
    2007                 :            :                               0, GFP_KERNEL);
    2008                 :          3 :         if (ret)
    2009                 :            :                 goto out;
    2010                 :            : 
    2011                 :            :         /*
    2012                 :            :          * We're accessing css_set_count without locking css_set_lock here,
    2013                 :            :          * but that's OK - it can only be increased by someone holding
    2014                 :            :          * cgroup_lock, and that's us.  Later rebinding may disable
    2015                 :            :          * controllers on the default hierarchy and thus create new csets,
    2016                 :            :          * which can't be more than the existing ones.  Allocate 2x.
    2017                 :            :          */
    2018                 :          3 :         ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
    2019                 :          3 :         if (ret)
    2020                 :            :                 goto cancel_ref;
    2021                 :            : 
    2022                 :          3 :         ret = cgroup_init_root_id(root);
    2023                 :          3 :         if (ret)
    2024                 :            :                 goto cancel_ref;
    2025                 :            : 
    2026                 :            :         kf_sops = root == &cgrp_dfl_root ?
    2027                 :          3 :                 &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
    2028                 :            : 
    2029                 :          3 :         root->kf_root = kernfs_create_root(kf_sops,
    2030                 :            :                                            KERNFS_ROOT_CREATE_DEACTIVATED |
    2031                 :            :                                            KERNFS_ROOT_SUPPORT_EXPORTOP,
    2032                 :            :                                            root_cgrp);
    2033                 :          3 :         if (IS_ERR(root->kf_root)) {
    2034                 :            :                 ret = PTR_ERR(root->kf_root);
    2035                 :          0 :                 goto exit_root_id;
    2036                 :            :         }
    2037                 :          3 :         root_cgrp->kn = root->kf_root->kn;
    2038                 :            : 
    2039                 :          3 :         ret = css_populate_dir(&root_cgrp->self);
    2040                 :          3 :         if (ret)
    2041                 :            :                 goto destroy_root;
    2042                 :            : 
    2043                 :          3 :         ret = rebind_subsystems(root, ss_mask);
    2044                 :          3 :         if (ret)
    2045                 :            :                 goto destroy_root;
    2046                 :            : 
    2047                 :          3 :         ret = cgroup_bpf_inherit(root_cgrp);
    2048                 :          3 :         WARN_ON_ONCE(ret);
    2049                 :            : 
    2050                 :          3 :         trace_cgroup_setup_root(root);
    2051                 :            : 
    2052                 :            :         /*
    2053                 :            :          * There must be no failure case after here, since rebinding takes
    2054                 :            :          * care of subsystems' refcounts, which are explicitly dropped in
    2055                 :            :          * the failure exit path.
    2056                 :            :          */
    2057                 :          3 :         list_add(&root->root_list, &cgroup_roots);
    2058                 :          3 :         cgroup_root_count++;
    2059                 :            : 
    2060                 :            :         /*
    2061                 :            :          * Link the root cgroup in this hierarchy into all the css_set
    2062                 :            :          * objects.
    2063                 :            :          */
    2064                 :            :         spin_lock_irq(&css_set_lock);
    2065                 :          3 :         hash_for_each(css_set_table, i, cset, hlist) {
    2066                 :          3 :                 link_css_set(&tmp_links, cset, root_cgrp);
    2067                 :          3 :                 if (css_set_populated(cset))
    2068                 :          3 :                         cgroup_update_populated(root_cgrp, true);
    2069                 :            :         }
    2070                 :            :         spin_unlock_irq(&css_set_lock);
    2071                 :            : 
    2072                 :          3 :         BUG_ON(!list_empty(&root_cgrp->self.children));
    2073                 :          3 :         BUG_ON(atomic_read(&root->nr_cgrps) != 1);
    2074                 :            : 
    2075                 :          3 :         kernfs_activate(root_cgrp->kn);
    2076                 :            :         ret = 0;
    2077                 :          3 :         goto out;
    2078                 :            : 
    2079                 :            : destroy_root:
    2080                 :          0 :         kernfs_destroy_root(root->kf_root);
    2081                 :          0 :         root->kf_root = NULL;
    2082                 :            : exit_root_id:
    2083                 :            :         cgroup_exit_root_id(root);
    2084                 :            : cancel_ref:
    2085                 :          0 :         percpu_ref_exit(&root_cgrp->self.refcnt);
    2086                 :            : out:
    2087                 :          3 :         free_cgrp_cset_links(&tmp_links);
    2088                 :          3 :         return ret;
    2089                 :            : }
    2090                 :            : 
    2091                 :          3 : int cgroup_do_get_tree(struct fs_context *fc)
    2092                 :            : {
    2093                 :            :         struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
    2094                 :            :         int ret;
    2095                 :            : 
    2096                 :          3 :         ctx->kfc.root = ctx->root->kf_root;
    2097                 :          3 :         if (fc->fs_type == &cgroup2_fs_type)
    2098                 :          3 :                 ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
    2099                 :            :         else
    2100                 :          3 :                 ctx->kfc.magic = CGROUP_SUPER_MAGIC;
    2101                 :          3 :         ret = kernfs_get_tree(fc);
    2102                 :            : 
    2103                 :            :         /*
    2104                 :            :          * In non-init cgroup namespace, instead of root cgroup's dentry,
    2105                 :            :          * we return the dentry corresponding to the cgroupns->root_cgrp.
    2106                 :            :          */
    2107                 :          3 :         if (!ret && ctx->ns != &init_cgroup_ns) {
    2108                 :            :                 struct dentry *nsdentry;
    2109                 :          0 :                 struct super_block *sb = fc->root->d_sb;
    2110                 :            :                 struct cgroup *cgrp;
    2111                 :            : 
    2112                 :          0 :                 mutex_lock(&cgroup_mutex);
    2113                 :            :                 spin_lock_irq(&css_set_lock);
    2114                 :            : 
    2115                 :          0 :                 cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
    2116                 :            : 
    2117                 :            :                 spin_unlock_irq(&css_set_lock);
    2118                 :          0 :                 mutex_unlock(&cgroup_mutex);
    2119                 :            : 
    2120                 :          0 :                 nsdentry = kernfs_node_dentry(cgrp->kn, sb);
    2121                 :          0 :                 dput(fc->root);
    2122                 :          0 :                 if (IS_ERR(nsdentry)) {
    2123                 :          0 :                         deactivate_locked_super(sb);
    2124                 :            :                         ret = PTR_ERR(nsdentry);
    2125                 :            :                         nsdentry = NULL;
    2126                 :            :                 }
    2127                 :          0 :                 fc->root = nsdentry;
    2128                 :            :         }
    2129                 :            : 
    2130                 :          3 :         if (!ctx->kfc.new_sb_created)
    2131                 :          0 :                 cgroup_put(&ctx->root->cgrp);
    2132                 :            : 
    2133                 :          3 :         return ret;
    2134                 :            : }
    2135                 :            : 
    2136                 :            : /*
    2137                 :            :  * Destroy a cgroup filesystem context.
    2138                 :            :  */
    2139                 :          3 : static void cgroup_fs_context_free(struct fs_context *fc)
    2140                 :            : {
    2141                 :            :         struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
    2142                 :            : 
    2143                 :          3 :         kfree(ctx->name);
    2144                 :          3 :         kfree(ctx->release_agent);
    2145                 :          3 :         put_cgroup_ns(ctx->ns);
    2146                 :          3 :         kernfs_free_fs_context(fc);
    2147                 :          3 :         kfree(ctx);
    2148                 :          3 : }
    2149                 :            : 
    2150                 :          3 : static int cgroup_get_tree(struct fs_context *fc)
    2151                 :            : {
    2152                 :            :         struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
    2153                 :            :         int ret;
    2154                 :            : 
    2155                 :          3 :         cgrp_dfl_visible = true;
    2156                 :          3 :         cgroup_get_live(&cgrp_dfl_root.cgrp);
    2157                 :          3 :         ctx->root = &cgrp_dfl_root;
    2158                 :            : 
    2159                 :          3 :         ret = cgroup_do_get_tree(fc);
    2160                 :          3 :         if (!ret)
    2161                 :          3 :                 apply_cgroup_root_flags(ctx->flags);
    2162                 :          3 :         return ret;
    2163                 :            : }
    2164                 :            : 
    2165                 :            : static const struct fs_context_operations cgroup_fs_context_ops = {
    2166                 :            :         .free           = cgroup_fs_context_free,
    2167                 :            :         .parse_param    = cgroup2_parse_param,
    2168                 :            :         .get_tree       = cgroup_get_tree,
    2169                 :            :         .reconfigure    = cgroup_reconfigure,
    2170                 :            : };
    2171                 :            : 
    2172                 :            : static const struct fs_context_operations cgroup1_fs_context_ops = {
    2173                 :            :         .free           = cgroup_fs_context_free,
    2174                 :            :         .parse_param    = cgroup1_parse_param,
    2175                 :            :         .get_tree       = cgroup1_get_tree,
    2176                 :            :         .reconfigure    = cgroup1_reconfigure,
    2177                 :            : };
    2178                 :            : 
    2179                 :            : /*
    2180                 :            :  * Initialise the cgroup filesystem creation/reconfiguration context.  Notably,
    2181                 :            :  * we select the namespace we're going to use.
    2182                 :            :  */
    2183                 :          3 : static int cgroup_init_fs_context(struct fs_context *fc)
    2184                 :            : {
    2185                 :            :         struct cgroup_fs_context *ctx;
    2186                 :            : 
    2187                 :          3 :         ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL);
    2188                 :          3 :         if (!ctx)
    2189                 :            :                 return -ENOMEM;
    2190                 :            : 
    2191                 :            :         /*
    2192                 :            :          * The first time anyone tries to mount a cgroup, enable the list
    2193                 :            :          * linking each css_set to its tasks and fix up all existing tasks.
    2194                 :            :          */
    2195                 :          3 :         if (!use_task_css_set_links)
    2196                 :          3 :                 cgroup_enable_task_cg_lists();
    2197                 :            : 
    2198                 :          3 :         ctx->ns = current->nsproxy->cgroup_ns;
    2199                 :            :         get_cgroup_ns(ctx->ns);
    2200                 :          3 :         fc->fs_private = &ctx->kfc;
    2201                 :          3 :         if (fc->fs_type == &cgroup2_fs_type)
    2202                 :          3 :                 fc->ops = &cgroup_fs_context_ops;
    2203                 :            :         else
    2204                 :          3 :                 fc->ops = &cgroup1_fs_context_ops;
    2205                 :          3 :         put_user_ns(fc->user_ns);
    2206                 :          3 :         fc->user_ns = get_user_ns(ctx->ns->user_ns);
    2207                 :          3 :         fc->global = true;
    2208                 :          3 :         return 0;
    2209                 :            : }
    2210                 :            : 
    2211                 :          0 : static void cgroup_kill_sb(struct super_block *sb)
    2212                 :            : {
    2213                 :          0 :         struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
    2214                 :            :         struct cgroup_root *root = cgroup_root_from_kf(kf_root);
    2215                 :            : 
    2216                 :            :         /*
    2217                 :            :          * If @root doesn't have any children, start killing it.
    2218                 :            :          * This prevents new mounts by disabling percpu_ref_tryget_live().
    2219                 :            :          * cgroup_mount() may wait for @root's release.
    2220                 :            :          *
    2221                 :            :          * And don't kill the default root.
    2222                 :            :          */
    2223                 :          0 :         if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
    2224                 :            :             !percpu_ref_is_dying(&root->cgrp.self.refcnt))
    2225                 :          0 :                 percpu_ref_kill(&root->cgrp.self.refcnt);
    2226                 :            :         cgroup_put(&root->cgrp);
    2227                 :          0 :         kernfs_kill_sb(sb);
    2228                 :          0 : }
    2229                 :            : 
    2230                 :            : struct file_system_type cgroup_fs_type = {
    2231                 :            :         .name                   = "cgroup",
    2232                 :            :         .init_fs_context        = cgroup_init_fs_context,
    2233                 :            :         .parameters             = &cgroup1_fs_parameters,
    2234                 :            :         .kill_sb                = cgroup_kill_sb,
    2235                 :            :         .fs_flags               = FS_USERNS_MOUNT,
    2236                 :            : };
    2237                 :            : 
    2238                 :            : static struct file_system_type cgroup2_fs_type = {
    2239                 :            :         .name                   = "cgroup2",
    2240                 :            :         .init_fs_context        = cgroup_init_fs_context,
    2241                 :            :         .parameters             = &cgroup2_fs_parameters,
    2242                 :            :         .kill_sb                = cgroup_kill_sb,
    2243                 :            :         .fs_flags               = FS_USERNS_MOUNT,
    2244                 :            : };
    2245                 :            : 
    2246                 :            : #ifdef CONFIG_CPUSETS
    2247                 :            : static const struct fs_context_operations cpuset_fs_context_ops = {
    2248                 :            :         .get_tree       = cgroup1_get_tree,
    2249                 :            :         .free           = cgroup_fs_context_free,
    2250                 :            : };
    2251                 :            : 
    2252                 :            : /*
    2253                 :            :  * This is ugly, but preserves the userspace API for existing cpuset
    2254                 :            :  * users. If someone tries to mount the "cpuset" filesystem, we
    2255                 :            :  * silently switch it to mount "cgroup" instead
    2256                 :            :  */
    2257                 :          0 : static int cpuset_init_fs_context(struct fs_context *fc)
    2258                 :            : {
    2259                 :          0 :         char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER);
    2260                 :            :         struct cgroup_fs_context *ctx;
    2261                 :            :         int err;
    2262                 :            : 
    2263                 :          0 :         err = cgroup_init_fs_context(fc);
    2264                 :          0 :         if (err) {
    2265                 :          0 :                 kfree(agent);
    2266                 :          0 :                 return err;
    2267                 :            :         }
    2268                 :            : 
    2269                 :          0 :         fc->ops = &cpuset_fs_context_ops;
    2270                 :            : 
    2271                 :            :         ctx = cgroup_fc2context(fc);
    2272                 :          0 :         ctx->subsys_mask = 1 << cpuset_cgrp_id;
    2273                 :          0 :         ctx->flags |= CGRP_ROOT_NOPREFIX;
    2274                 :          0 :         ctx->release_agent = agent;
    2275                 :            : 
    2276                 :          0 :         get_filesystem(&cgroup_fs_type);
    2277                 :          0 :         put_filesystem(fc->fs_type);
    2278                 :          0 :         fc->fs_type = &cgroup_fs_type;
    2279                 :            : 
    2280                 :          0 :         return 0;
    2281                 :            : }
    2282                 :            : 
    2283                 :            : static struct file_system_type cpuset_fs_type = {
    2284                 :            :         .name                   = "cpuset",
    2285                 :            :         .init_fs_context        = cpuset_init_fs_context,
    2286                 :            :         .fs_flags               = FS_USERNS_MOUNT,
    2287                 :            : };
    2288                 :            : #endif
    2289                 :            : 
    2290                 :          3 : int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
    2291                 :            :                           struct cgroup_namespace *ns)
    2292                 :            : {
    2293                 :          3 :         struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
    2294                 :            : 
    2295                 :          3 :         return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
    2296                 :            : }
    2297                 :            : 
    2298                 :          0 : int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
    2299                 :            :                    struct cgroup_namespace *ns)
    2300                 :            : {
    2301                 :            :         int ret;
    2302                 :            : 
    2303                 :          0 :         mutex_lock(&cgroup_mutex);
    2304                 :            :         spin_lock_irq(&css_set_lock);
    2305                 :            : 
    2306                 :          0 :         ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
    2307                 :            : 
    2308                 :            :         spin_unlock_irq(&css_set_lock);
    2309                 :          0 :         mutex_unlock(&cgroup_mutex);
    2310                 :            : 
    2311                 :          0 :         return ret;
    2312                 :            : }
    2313                 :            : EXPORT_SYMBOL_GPL(cgroup_path_ns);
    2314                 :            : 
    2315                 :            : /**
    2316                 :            :  * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
    2317                 :            :  * @task: target task
    2318                 :            :  * @buf: the buffer to write the path into
    2319                 :            :  * @buflen: the length of the buffer
    2320                 :            :  *
    2321                 :            :  * Determine @task's cgroup on the first (the one with the lowest non-zero
    2322                 :            :  * hierarchy_id) cgroup hierarchy and copy its path into @buf.  This
    2323                 :            :  * function grabs cgroup_mutex and shouldn't be used inside locks used by
    2324                 :            :  * cgroup controller callbacks.
    2325                 :            :  *
    2326                 :            :  * Return value is the same as kernfs_path().
    2327                 :            :  */
    2328                 :          0 : int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
    2329                 :            : {
    2330                 :            :         struct cgroup_root *root;
    2331                 :            :         struct cgroup *cgrp;
    2332                 :          0 :         int hierarchy_id = 1;
    2333                 :            :         int ret;
    2334                 :            : 
    2335                 :          0 :         mutex_lock(&cgroup_mutex);
    2336                 :            :         spin_lock_irq(&css_set_lock);
    2337                 :            : 
    2338                 :          0 :         root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
    2339                 :            : 
    2340                 :          0 :         if (root) {
    2341                 :            :                 cgrp = task_cgroup_from_root(task, root);
    2342                 :          0 :                 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
    2343                 :            :         } else {
    2344                 :            :                 /* if no hierarchy exists, everyone is in "/" */
    2345                 :          0 :                 ret = strlcpy(buf, "/", buflen);
    2346                 :            :         }
    2347                 :            : 
    2348                 :            :         spin_unlock_irq(&css_set_lock);
    2349                 :          0 :         mutex_unlock(&cgroup_mutex);
    2350                 :          0 :         return ret;
    2351                 :            : }
    2352                 :            : EXPORT_SYMBOL_GPL(task_cgroup_path);
    2353                 :            : 
    2354                 :            : /**
    2355                 :            :  * cgroup_migrate_add_task - add a migration target task to a migration context
    2356                 :            :  * @task: target task
    2357                 :            :  * @mgctx: target migration context
    2358                 :            :  *
    2359                 :            :  * Add @task, which is a migration target, to @mgctx->tset.  This function
    2360                 :            :  * becomes noop if @task doesn't need to be migrated.  @task's css_set
    2361                 :            :  * should have been added as a migration source and @task->cg_list will be
    2362                 :            :  * moved from the css_set's tasks list to mg_tasks one.
    2363                 :            :  */
    2364                 :          3 : static void cgroup_migrate_add_task(struct task_struct *task,
    2365                 :            :                                     struct cgroup_mgctx *mgctx)
    2366                 :            : {
    2367                 :            :         struct css_set *cset;
    2368                 :            : 
    2369                 :            :         lockdep_assert_held(&css_set_lock);
    2370                 :            : 
    2371                 :            :         /* @task either already exited or can't exit until the end */
    2372                 :          3 :         if (task->flags & PF_EXITING)
    2373                 :            :                 return;
    2374                 :            : 
    2375                 :            :         /* leave @task alone if post_fork() hasn't linked it yet */
    2376                 :          3 :         if (list_empty(&task->cg_list))
    2377                 :            :                 return;
    2378                 :            : 
    2379                 :            :         cset = task_css_set(task);
    2380                 :          3 :         if (!cset->mg_src_cgrp)
    2381                 :            :                 return;
    2382                 :            : 
    2383                 :          3 :         mgctx->tset.nr_tasks++;
    2384                 :            : 
    2385                 :          3 :         list_move_tail(&task->cg_list, &cset->mg_tasks);
    2386                 :          3 :         if (list_empty(&cset->mg_node))
    2387                 :          3 :                 list_add_tail(&cset->mg_node,
    2388                 :            :                               &mgctx->tset.src_csets);
    2389                 :          3 :         if (list_empty(&cset->mg_dst_cset->mg_node))
    2390                 :          3 :                 list_add_tail(&cset->mg_dst_cset->mg_node,
    2391                 :            :                               &mgctx->tset.dst_csets);
    2392                 :            : }
    2393                 :            : 
    2394                 :            : /**
    2395                 :            :  * cgroup_taskset_first - reset taskset and return the first task
    2396                 :            :  * @tset: taskset of interest
    2397                 :            :  * @dst_cssp: output variable for the destination css
    2398                 :            :  *
    2399                 :            :  * @tset iteration is initialized and the first task is returned.
    2400                 :            :  */
    2401                 :          3 : struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
    2402                 :            :                                          struct cgroup_subsys_state **dst_cssp)
    2403                 :            : {
    2404                 :          3 :         tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
    2405                 :          3 :         tset->cur_task = NULL;
    2406                 :            : 
    2407                 :          3 :         return cgroup_taskset_next(tset, dst_cssp);
    2408                 :            : }
    2409                 :            : 
    2410                 :            : /**
    2411                 :            :  * cgroup_taskset_next - iterate to the next task in taskset
    2412                 :            :  * @tset: taskset of interest
    2413                 :            :  * @dst_cssp: output variable for the destination css
    2414                 :            :  *
    2415                 :            :  * Return the next task in @tset.  Iteration must have been initialized
    2416                 :            :  * with cgroup_taskset_first().
    2417                 :            :  */
    2418                 :          3 : struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
    2419                 :            :                                         struct cgroup_subsys_state **dst_cssp)
    2420                 :            : {
    2421                 :          3 :         struct css_set *cset = tset->cur_cset;
    2422                 :          3 :         struct task_struct *task = tset->cur_task;
    2423                 :            : 
    2424                 :          3 :         while (&cset->mg_node != tset->csets) {
    2425                 :          3 :                 if (!task)
    2426                 :          3 :                         task = list_first_entry(&cset->mg_tasks,
    2427                 :            :                                                 struct task_struct, cg_list);
    2428                 :            :                 else
    2429                 :          3 :                         task = list_next_entry(task, cg_list);
    2430                 :            : 
    2431                 :          3 :                 if (&task->cg_list != &cset->mg_tasks) {
    2432                 :          3 :                         tset->cur_cset = cset;
    2433                 :          3 :                         tset->cur_task = task;
    2434                 :            : 
    2435                 :            :                         /*
    2436                 :            :                          * This function may be called both before and
    2437                 :            :                          * after cgroup_taskset_migrate().  The two cases
    2438                 :            :                          * can be distinguished by looking at whether @cset
    2439                 :            :                          * has its ->mg_dst_cset set.
    2440                 :            :                          */
    2441                 :          3 :                         if (cset->mg_dst_cset)
    2442                 :          3 :                                 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
    2443                 :            :                         else
    2444                 :          0 :                                 *dst_cssp = cset->subsys[tset->ssid];
    2445                 :            : 
    2446                 :          3 :                         return task;
    2447                 :            :                 }
    2448                 :            : 
    2449                 :          3 :                 cset = list_next_entry(cset, mg_node);
    2450                 :            :                 task = NULL;
    2451                 :            :         }
    2452                 :            : 
    2453                 :            :         return NULL;
    2454                 :            : }
    2455                 :            : 
    2456                 :            : /**
    2457                 :            :  * cgroup_taskset_migrate - migrate a taskset
    2458                 :            :  * @mgctx: migration context
    2459                 :            :  *
    2460                 :            :  * Migrate tasks in @mgctx as setup by migration preparation functions.
    2461                 :            :  * This function fails iff one of the ->can_attach callbacks fails and
    2462                 :            :  * guarantees that either all or none of the tasks in @mgctx are migrated.
    2463                 :            :  * @mgctx is consumed regardless of success.
    2464                 :            :  */
    2465                 :          3 : static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
    2466                 :            : {
    2467                 :          3 :         struct cgroup_taskset *tset = &mgctx->tset;
    2468                 :            :         struct cgroup_subsys *ss;
    2469                 :            :         struct task_struct *task, *tmp_task;
    2470                 :            :         struct css_set *cset, *tmp_cset;
    2471                 :            :         int ssid, failed_ssid, ret;
    2472                 :            : 
    2473                 :            :         /* check that we can legitimately attach to the cgroup */
    2474                 :          3 :         if (tset->nr_tasks) {
    2475                 :          3 :                 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
    2476                 :          3 :                         if (ss->can_attach) {
    2477                 :          3 :                                 tset->ssid = ssid;
    2478                 :          3 :                                 ret = ss->can_attach(tset);
    2479                 :          3 :                                 if (ret) {
    2480                 :          0 :                                         failed_ssid = ssid;
    2481                 :          0 :                                         goto out_cancel_attach;
    2482                 :            :                                 }
    2483                 :            :                         }
    2484                 :            :                 } while_each_subsys_mask();
    2485                 :            :         }
    2486                 :            : 
    2487                 :            :         /*
    2488                 :            :          * Now that we're guaranteed success, proceed to move all tasks to
    2489                 :            :          * the new cgroup.  There are no failure cases after here, so this
    2490                 :            :          * is the commit point.
    2491                 :            :          */
    2492                 :            :         spin_lock_irq(&css_set_lock);
    2493                 :          3 :         list_for_each_entry(cset, &tset->src_csets, mg_node) {
    2494                 :          3 :                 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
    2495                 :            :                         struct css_set *from_cset = task_css_set(task);
    2496                 :          3 :                         struct css_set *to_cset = cset->mg_dst_cset;
    2497                 :            : 
    2498                 :            :                         get_css_set(to_cset);
    2499                 :          3 :                         to_cset->nr_tasks++;
    2500                 :          3 :                         css_set_move_task(task, from_cset, to_cset, true);
    2501                 :          3 :                         from_cset->nr_tasks--;
    2502                 :            :                         /*
    2503                 :            :                          * If the source or destination cgroup is frozen,
    2504                 :            :                          * the task might require to change its state.
    2505                 :            :                          */
    2506                 :          3 :                         cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
    2507                 :            :                                                     to_cset->dfl_cgrp);
    2508                 :          3 :                         put_css_set_locked(from_cset);
    2509                 :            : 
    2510                 :            :                 }
    2511                 :            :         }
    2512                 :            :         spin_unlock_irq(&css_set_lock);
    2513                 :            : 
    2514                 :            :         /*
    2515                 :            :          * Migration is committed, all target tasks are now on dst_csets.
    2516                 :            :          * Nothing is sensitive to fork() after this point.  Notify
    2517                 :            :          * controllers that migration is complete.
    2518                 :            :          */
    2519                 :          3 :         tset->csets = &tset->dst_csets;
    2520                 :            : 
    2521                 :          3 :         if (tset->nr_tasks) {
    2522                 :          3 :                 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
    2523                 :          3 :                         if (ss->attach) {
    2524                 :          0 :                                 tset->ssid = ssid;
    2525                 :          0 :                                 ss->attach(tset);
    2526                 :            :                         }
    2527                 :            :                 } while_each_subsys_mask();
    2528                 :            :         }
    2529                 :            : 
    2530                 :            :         ret = 0;
    2531                 :            :         goto out_release_tset;
    2532                 :            : 
    2533                 :            : out_cancel_attach:
    2534                 :          0 :         if (tset->nr_tasks) {
    2535                 :          0 :                 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
    2536                 :          0 :                         if (ssid == failed_ssid)
    2537                 :            :                                 break;
    2538                 :          0 :                         if (ss->cancel_attach) {
    2539                 :          0 :                                 tset->ssid = ssid;
    2540                 :          0 :                                 ss->cancel_attach(tset);
    2541                 :            :                         }
    2542                 :            :                 } while_each_subsys_mask();
    2543                 :            :         }
    2544                 :            : out_release_tset:
    2545                 :            :         spin_lock_irq(&css_set_lock);
    2546                 :          3 :         list_splice_init(&tset->dst_csets, &tset->src_csets);
    2547                 :          3 :         list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
    2548                 :          3 :                 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
    2549                 :            :                 list_del_init(&cset->mg_node);
    2550                 :            :         }
    2551                 :            :         spin_unlock_irq(&css_set_lock);
    2552                 :            : 
    2553                 :            :         /*
    2554                 :            :          * Re-initialize the cgroup_taskset structure in case it is reused
    2555                 :            :          * again in another cgroup_migrate_add_task()/cgroup_migrate_execute()
    2556                 :            :          * iteration.
    2557                 :            :          */
    2558                 :          3 :         tset->nr_tasks = 0;
    2559                 :          3 :         tset->csets    = &tset->src_csets;
    2560                 :          3 :         return ret;
    2561                 :            : }
    2562                 :            : 
    2563                 :            : /**
    2564                 :            :  * cgroup_migrate_vet_dst - verify whether a cgroup can be migration destination
    2565                 :            :  * @dst_cgrp: destination cgroup to test
    2566                 :            :  *
    2567                 :            :  * On the default hierarchy, except for the mixable, (possible) thread root
    2568                 :            :  * and threaded cgroups, subtree_control must be zero for migration
    2569                 :            :  * destination cgroups with tasks so that child cgroups don't compete
    2570                 :            :  * against tasks.
    2571                 :            :  */
    2572                 :          3 : int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
    2573                 :            : {
    2574                 :            :         /* v1 doesn't have any restriction */
    2575                 :          3 :         if (!cgroup_on_dfl(dst_cgrp))
    2576                 :            :                 return 0;
    2577                 :            : 
    2578                 :            :         /* verify @dst_cgrp can host resources */
    2579                 :          3 :         if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
    2580                 :            :                 return -EOPNOTSUPP;
    2581                 :            : 
    2582                 :            :         /* mixables don't care */
    2583                 :          3 :         if (cgroup_is_mixable(dst_cgrp))
    2584                 :            :                 return 0;
    2585                 :            : 
    2586                 :            :         /*
    2587                 :            :          * If @dst_cgrp is already or can become a thread root or is
    2588                 :            :          * threaded, it doesn't matter.
    2589                 :            :          */
    2590                 :          3 :         if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
    2591                 :            :                 return 0;
    2592                 :            : 
    2593                 :            :         /* apply no-internal-process constraint */
    2594                 :          0 :         if (dst_cgrp->subtree_control)
    2595                 :            :                 return -EBUSY;
    2596                 :            : 
    2597                 :          0 :         return 0;
    2598                 :            : }
    2599                 :            : 
    2600                 :            : /**
    2601                 :            :  * cgroup_migrate_finish - cleanup after attach
    2602                 :            :  * @mgctx: migration context
    2603                 :            :  *
    2604                 :            :  * Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst().  See
    2605                 :            :  * those functions for details.
    2606                 :            :  */
    2607                 :          3 : void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
    2608                 :            : {
    2609                 :          3 :         LIST_HEAD(preloaded);
    2610                 :            :         struct css_set *cset, *tmp_cset;
    2611                 :            : 
    2612                 :            :         lockdep_assert_held(&cgroup_mutex);
    2613                 :            : 
    2614                 :            :         spin_lock_irq(&css_set_lock);
    2615                 :            : 
    2616                 :          3 :         list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
    2617                 :          3 :         list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
    2618                 :            : 
    2619                 :          3 :         list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
    2620                 :          3 :                 cset->mg_src_cgrp = NULL;
    2621                 :          3 :                 cset->mg_dst_cgrp = NULL;
    2622                 :          3 :                 cset->mg_dst_cset = NULL;
    2623                 :            :                 list_del_init(&cset->mg_preload_node);
    2624                 :          3 :                 put_css_set_locked(cset);
    2625                 :            :         }
    2626                 :            : 
    2627                 :            :         spin_unlock_irq(&css_set_lock);
    2628                 :          3 : }
    2629                 :            : 
    2630                 :            : /**
    2631                 :            :  * cgroup_migrate_add_src - add a migration source css_set
    2632                 :            :  * @src_cset: the source css_set to add
    2633                 :            :  * @dst_cgrp: the destination cgroup
    2634                 :            :  * @mgctx: migration context
    2635                 :            :  *
    2636                 :            :  * Tasks belonging to @src_cset are about to be migrated to @dst_cgrp.  Pin
    2637                 :            :  * @src_cset and add it to @mgctx->src_csets, which should later be cleaned
    2638                 :            :  * up by cgroup_migrate_finish().
    2639                 :            :  *
    2640                 :            :  * This function may be called without holding cgroup_threadgroup_rwsem
    2641                 :            :  * even if the target is a process.  Threads may be created and destroyed
    2642                 :            :  * but as long as cgroup_mutex is not dropped, no new css_set can be put
    2643                 :            :  * into play and the preloaded css_sets are guaranteed to cover all
    2644                 :            :  * migrations.
    2645                 :            :  */
    2646                 :          3 : void cgroup_migrate_add_src(struct css_set *src_cset,
    2647                 :            :                             struct cgroup *dst_cgrp,
    2648                 :            :                             struct cgroup_mgctx *mgctx)
    2649                 :            : {
    2650                 :            :         struct cgroup *src_cgrp;
    2651                 :            : 
    2652                 :            :         lockdep_assert_held(&cgroup_mutex);
    2653                 :            :         lockdep_assert_held(&css_set_lock);
    2654                 :            : 
    2655                 :            :         /*
    2656                 :            :          * If ->dead, @src_set is associated with one or more dead cgroups
    2657                 :            :          * and doesn't contain any migratable tasks.  Ignore it early so
    2658                 :            :          * that the rest of migration path doesn't get confused by it.
    2659                 :            :          */
    2660                 :          3 :         if (src_cset->dead)
    2661                 :            :                 return;
    2662                 :            : 
    2663                 :          3 :         src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
    2664                 :            : 
    2665                 :          3 :         if (!list_empty(&src_cset->mg_preload_node))
    2666                 :            :                 return;
    2667                 :            : 
    2668                 :          3 :         WARN_ON(src_cset->mg_src_cgrp);
    2669                 :          3 :         WARN_ON(src_cset->mg_dst_cgrp);
    2670                 :          3 :         WARN_ON(!list_empty(&src_cset->mg_tasks));
    2671                 :          3 :         WARN_ON(!list_empty(&src_cset->mg_node));
    2672                 :            : 
    2673                 :          3 :         src_cset->mg_src_cgrp = src_cgrp;
    2674                 :          3 :         src_cset->mg_dst_cgrp = dst_cgrp;
    2675                 :            :         get_css_set(src_cset);
    2676                 :          3 :         list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
    2677                 :            : }
    2678                 :            : 
    2679                 :            : /**
    2680                 :            :  * cgroup_migrate_prepare_dst - prepare destination css_sets for migration
    2681                 :            :  * @mgctx: migration context
    2682                 :            :  *
    2683                 :            :  * Tasks are about to be moved and all the source css_sets have been
    2684                 :            :  * preloaded to @mgctx->preloaded_src_csets.  This function looks up and
    2685                 :            :  * pins all destination css_sets, links each to its source, and append them
    2686                 :            :  * to @mgctx->preloaded_dst_csets.
    2687                 :            :  *
    2688                 :            :  * This function must be called after cgroup_migrate_add_src() has been
    2689                 :            :  * called on each migration source css_set.  After migration is performed
    2690                 :            :  * using cgroup_migrate(), cgroup_migrate_finish() must be called on
    2691                 :            :  * @mgctx.
    2692                 :            :  */
    2693                 :          3 : int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
    2694                 :            : {
    2695                 :            :         struct css_set *src_cset, *tmp_cset;
    2696                 :            : 
    2697                 :            :         lockdep_assert_held(&cgroup_mutex);
    2698                 :            : 
    2699                 :            :         /* look up the dst cset for each src cset and link it to src */
    2700                 :          3 :         list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
    2701                 :            :                                  mg_preload_node) {
    2702                 :            :                 struct css_set *dst_cset;
    2703                 :            :                 struct cgroup_subsys *ss;
    2704                 :            :                 int ssid;
    2705                 :            : 
    2706                 :          3 :                 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
    2707                 :          3 :                 if (!dst_cset)
    2708                 :            :                         return -ENOMEM;
    2709                 :            : 
    2710                 :          3 :                 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
    2711                 :            : 
    2712                 :            :                 /*
    2713                 :            :                  * If src cset equals dst, it's noop.  Drop the src.
    2714                 :            :                  * cgroup_migrate() will skip the cset too.  Note that we
    2715                 :            :                  * can't handle src == dst as some nodes are used by both.
    2716                 :            :                  */
    2717                 :          3 :                 if (src_cset == dst_cset) {
    2718                 :          3 :                         src_cset->mg_src_cgrp = NULL;
    2719                 :          3 :                         src_cset->mg_dst_cgrp = NULL;
    2720                 :            :                         list_del_init(&src_cset->mg_preload_node);
    2721                 :          3 :                         put_css_set(src_cset);
    2722                 :          3 :                         put_css_set(dst_cset);
    2723                 :          3 :                         continue;
    2724                 :            :                 }
    2725                 :            : 
    2726                 :          3 :                 src_cset->mg_dst_cset = dst_cset;
    2727                 :            : 
    2728                 :          3 :                 if (list_empty(&dst_cset->mg_preload_node))
    2729                 :          3 :                         list_add_tail(&dst_cset->mg_preload_node,
    2730                 :            :                                       &mgctx->preloaded_dst_csets);
    2731                 :            :                 else
    2732                 :          0 :                         put_css_set(dst_cset);
    2733                 :            : 
    2734                 :          3 :                 for_each_subsys(ss, ssid)
    2735                 :          3 :                         if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
    2736                 :          3 :                                 mgctx->ss_mask |= 1 << ssid;
    2737                 :            :         }
    2738                 :            : 
    2739                 :            :         return 0;
    2740                 :            : }
    2741                 :            : 
    2742                 :            : /**
    2743                 :            :  * cgroup_migrate - migrate a process or task to a cgroup
    2744                 :            :  * @leader: the leader of the process or the task to migrate
    2745                 :            :  * @threadgroup: whether @leader points to the whole process or a single task
    2746                 :            :  * @mgctx: migration context
    2747                 :            :  *
    2748                 :            :  * Migrate a process or task denoted by @leader.  If migrating a process,
    2749                 :            :  * the caller must be holding cgroup_threadgroup_rwsem.  The caller is also
    2750                 :            :  * responsible for invoking cgroup_migrate_add_src() and
    2751                 :            :  * cgroup_migrate_prepare_dst() on the targets before invoking this
    2752                 :            :  * function and following up with cgroup_migrate_finish().
    2753                 :            :  *
    2754                 :            :  * As long as a controller's ->can_attach() doesn't fail, this function is
    2755                 :            :  * guaranteed to succeed.  This means that, excluding ->can_attach()
    2756                 :            :  * failure, when migrating multiple targets, the success or failure can be
    2757                 :            :  * decided for all targets by invoking group_migrate_prepare_dst() before
    2758                 :            :  * actually starting migrating.
    2759                 :            :  */
    2760                 :          3 : int cgroup_migrate(struct task_struct *leader, bool threadgroup,
    2761                 :            :                    struct cgroup_mgctx *mgctx)
    2762                 :            : {
    2763                 :            :         struct task_struct *task;
    2764                 :            : 
    2765                 :            :         /*
    2766                 :            :          * Prevent freeing of tasks while we take a snapshot. Tasks that are
    2767                 :            :          * already PF_EXITING could be freed from underneath us unless we
    2768                 :            :          * take an rcu_read_lock.
    2769                 :            :          */
    2770                 :            :         spin_lock_irq(&css_set_lock);
    2771                 :            :         rcu_read_lock();
    2772                 :            :         task = leader;
    2773                 :            :         do {
    2774                 :          3 :                 cgroup_migrate_add_task(task, mgctx);
    2775                 :          3 :                 if (!threadgroup)
    2776                 :            :                         break;
    2777                 :          3 :         } while_each_thread(leader, task);
    2778                 :            :         rcu_read_unlock();
    2779                 :            :         spin_unlock_irq(&css_set_lock);
    2780                 :            : 
    2781                 :          3 :         return cgroup_migrate_execute(mgctx);
    2782                 :            : }
    2783                 :            : 
    2784                 :            : /**
    2785                 :            :  * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
    2786                 :            :  * @dst_cgrp: the cgroup to attach to
    2787                 :            :  * @leader: the task or the leader of the threadgroup to be attached
    2788                 :            :  * @threadgroup: attach the whole threadgroup?
    2789                 :            :  *
    2790                 :            :  * Call holding cgroup_mutex and cgroup_threadgroup_rwsem.
    2791                 :            :  */
    2792                 :          3 : int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
    2793                 :            :                        bool threadgroup)
    2794                 :            : {
    2795                 :          3 :         DEFINE_CGROUP_MGCTX(mgctx);
    2796                 :            :         struct task_struct *task;
    2797                 :            :         int ret;
    2798                 :            : 
    2799                 :          3 :         ret = cgroup_migrate_vet_dst(dst_cgrp);
    2800                 :          3 :         if (ret)
    2801                 :            :                 return ret;
    2802                 :            : 
    2803                 :            :         /* look up all src csets */
    2804                 :            :         spin_lock_irq(&css_set_lock);
    2805                 :            :         rcu_read_lock();
    2806                 :            :         task = leader;
    2807                 :            :         do {
    2808                 :          3 :                 cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
    2809                 :          3 :                 if (!threadgroup)
    2810                 :            :                         break;
    2811                 :          3 :         } while_each_thread(leader, task);
    2812                 :            :         rcu_read_unlock();
    2813                 :            :         spin_unlock_irq(&css_set_lock);
    2814                 :            : 
    2815                 :            :         /* prepare dst csets and commit */
    2816                 :          3 :         ret = cgroup_migrate_prepare_dst(&mgctx);
    2817                 :          3 :         if (!ret)
    2818                 :          3 :                 ret = cgroup_migrate(leader, threadgroup, &mgctx);
    2819                 :            : 
    2820                 :          3 :         cgroup_migrate_finish(&mgctx);
    2821                 :            : 
    2822                 :          3 :         if (!ret)
    2823                 :          3 :                 TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
    2824                 :            : 
    2825                 :          3 :         return ret;
    2826                 :            : }
    2827                 :            : 
    2828                 :          3 : struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
    2829                 :            :         __acquires(&cgroup_threadgroup_rwsem)
    2830                 :            : {
    2831                 :            :         struct task_struct *tsk;
    2832                 :            :         pid_t pid;
    2833                 :            : 
    2834                 :          3 :         if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
    2835                 :            :                 return ERR_PTR(-EINVAL);
    2836                 :            : 
    2837                 :          3 :         percpu_down_write(&cgroup_threadgroup_rwsem);
    2838                 :            : 
    2839                 :            :         rcu_read_lock();
    2840                 :          3 :         if (pid) {
    2841                 :          3 :                 tsk = find_task_by_vpid(pid);
    2842                 :          3 :                 if (!tsk) {
    2843                 :            :                         tsk = ERR_PTR(-ESRCH);
    2844                 :            :                         goto out_unlock_threadgroup;
    2845                 :            :                 }
    2846                 :            :         } else {
    2847                 :          0 :                 tsk = current;
    2848                 :            :         }
    2849                 :            : 
    2850                 :          3 :         if (threadgroup)
    2851                 :          3 :                 tsk = tsk->group_leader;
    2852                 :            : 
    2853                 :            :         /*
    2854                 :            :          * kthreads may acquire PF_NO_SETAFFINITY during initialization.
    2855                 :            :          * If userland migrates such a kthread to a non-root cgroup, it can
    2856                 :            :          * become trapped in a cpuset, or RT kthread may be born in a
    2857                 :            :          * cgroup with no rt_runtime allocated.  Just say no.
    2858                 :            :          */
    2859                 :          3 :         if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
    2860                 :            :                 tsk = ERR_PTR(-EINVAL);
    2861                 :            :                 goto out_unlock_threadgroup;
    2862                 :            :         }
    2863                 :            : 
    2864                 :            :         get_task_struct(tsk);
    2865                 :            :         goto out_unlock_rcu;
    2866                 :            : 
    2867                 :            : out_unlock_threadgroup:
    2868                 :          0 :         percpu_up_write(&cgroup_threadgroup_rwsem);
    2869                 :            : out_unlock_rcu:
    2870                 :            :         rcu_read_unlock();
    2871                 :          3 :         return tsk;
    2872                 :            : }
    2873                 :            : 
    2874                 :          3 : void cgroup_procs_write_finish(struct task_struct *task)
    2875                 :            :         __releases(&cgroup_threadgroup_rwsem)
    2876                 :            : {
    2877                 :            :         struct cgroup_subsys *ss;
    2878                 :            :         int ssid;
    2879                 :            : 
    2880                 :            :         /* release reference from cgroup_procs_write_start() */
    2881                 :          3 :         put_task_struct(task);
    2882                 :            : 
    2883                 :          3 :         percpu_up_write(&cgroup_threadgroup_rwsem);
    2884                 :          3 :         for_each_subsys(ss, ssid)
    2885                 :          3 :                 if (ss->post_attach)
    2886                 :          3 :                         ss->post_attach();
    2887                 :          3 : }
    2888                 :            : 
    2889                 :          0 : static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
    2890                 :            : {
    2891                 :            :         struct cgroup_subsys *ss;
    2892                 :            :         bool printed = false;
    2893                 :            :         int ssid;
    2894                 :            : 
    2895                 :          0 :         do_each_subsys_mask(ss, ssid, ss_mask) {
    2896                 :          0 :                 if (printed)
    2897                 :          0 :                         seq_putc(seq, ' ');
    2898                 :          0 :                 seq_puts(seq, ss->name);
    2899                 :            :                 printed = true;
    2900                 :            :         } while_each_subsys_mask();
    2901                 :          0 :         if (printed)
    2902                 :          0 :                 seq_putc(seq, '\n');
    2903                 :          0 : }
    2904                 :            : 
    2905                 :            : /* show controllers which are enabled from the parent */
    2906                 :          0 : static int cgroup_controllers_show(struct seq_file *seq, void *v)
    2907                 :            : {
    2908                 :          0 :         struct cgroup *cgrp = seq_css(seq)->cgroup;
    2909                 :            : 
    2910                 :          0 :         cgroup_print_ss_mask(seq, cgroup_control(cgrp));
    2911                 :          0 :         return 0;
    2912                 :            : }
    2913                 :            : 
    2914                 :            : /* show controllers which are enabled for a given cgroup's children */
    2915                 :          0 : static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
    2916                 :            : {
    2917                 :          0 :         struct cgroup *cgrp = seq_css(seq)->cgroup;
    2918                 :            : 
    2919                 :          0 :         cgroup_print_ss_mask(seq, cgrp->subtree_control);
    2920                 :          0 :         return 0;
    2921                 :            : }
    2922                 :            : 
    2923                 :            : /**
    2924                 :            :  * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
    2925                 :            :  * @cgrp: root of the subtree to update csses for
    2926                 :            :  *
    2927                 :            :  * @cgrp's control masks have changed and its subtree's css associations
    2928                 :            :  * need to be updated accordingly.  This function looks up all css_sets
    2929                 :            :  * which are attached to the subtree, creates the matching updated css_sets
    2930                 :            :  * and migrates the tasks to the new ones.
    2931                 :            :  */
    2932                 :          3 : static int cgroup_update_dfl_csses(struct cgroup *cgrp)
    2933                 :            : {
    2934                 :          3 :         DEFINE_CGROUP_MGCTX(mgctx);
    2935                 :            :         struct cgroup_subsys_state *d_css;
    2936                 :            :         struct cgroup *dsct;
    2937                 :            :         struct css_set *src_cset;
    2938                 :            :         int ret;
    2939                 :            : 
    2940                 :            :         lockdep_assert_held(&cgroup_mutex);
    2941                 :            : 
    2942                 :          3 :         percpu_down_write(&cgroup_threadgroup_rwsem);
    2943                 :            : 
    2944                 :            :         /* look up all csses currently attached to @cgrp's subtree */
    2945                 :            :         spin_lock_irq(&css_set_lock);
    2946                 :          3 :         cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
    2947                 :            :                 struct cgrp_cset_link *link;
    2948                 :            : 
    2949                 :          3 :                 list_for_each_entry(link, &dsct->cset_links, cset_link)
    2950                 :          3 :                         cgroup_migrate_add_src(link->cset, dsct, &mgctx);
    2951                 :            :         }
    2952                 :            :         spin_unlock_irq(&css_set_lock);
    2953                 :            : 
    2954                 :            :         /* NULL dst indicates self on default hierarchy */
    2955                 :          3 :         ret = cgroup_migrate_prepare_dst(&mgctx);
    2956                 :          3 :         if (ret)
    2957                 :            :                 goto out_finish;
    2958                 :            : 
    2959                 :            :         spin_lock_irq(&css_set_lock);
    2960                 :          3 :         list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
    2961                 :            :                 struct task_struct *task, *ntask;
    2962                 :            : 
    2963                 :            :                 /* all tasks in src_csets need to be migrated */
    2964                 :          0 :                 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
    2965                 :          0 :                         cgroup_migrate_add_task(task, &mgctx);
    2966                 :            :         }
    2967                 :            :         spin_unlock_irq(&css_set_lock);
    2968                 :            : 
    2969                 :          3 :         ret = cgroup_migrate_execute(&mgctx);
    2970                 :            : out_finish:
    2971                 :          3 :         cgroup_migrate_finish(&mgctx);
    2972                 :          3 :         percpu_up_write(&cgroup_threadgroup_rwsem);
    2973                 :          3 :         return ret;
    2974                 :            : }
    2975                 :            : 
    2976                 :            : /**
    2977                 :            :  * cgroup_lock_and_drain_offline - lock cgroup_mutex and drain offlined csses
    2978                 :            :  * @cgrp: root of the target subtree
    2979                 :            :  *
    2980                 :            :  * Because css offlining is asynchronous, userland may try to re-enable a
    2981                 :            :  * controller while the previous css is still around.  This function grabs
    2982                 :            :  * cgroup_mutex and drains the previous css instances of @cgrp's subtree.
    2983                 :            :  */
    2984                 :          3 : void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
    2985                 :            :         __acquires(&cgroup_mutex)
    2986                 :            : {
    2987                 :            :         struct cgroup *dsct;
    2988                 :            :         struct cgroup_subsys_state *d_css;
    2989                 :            :         struct cgroup_subsys *ss;
    2990                 :            :         int ssid;
    2991                 :            : 
    2992                 :            : restart:
    2993                 :          3 :         mutex_lock(&cgroup_mutex);
    2994                 :            : 
    2995                 :          3 :         cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
    2996                 :          3 :                 for_each_subsys(ss, ssid) {
    2997                 :            :                         struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
    2998                 :          3 :                         DEFINE_WAIT(wait);
    2999                 :            : 
    3000                 :          3 :                         if (!css || !percpu_ref_is_dying(&css->refcnt))
    3001                 :          3 :                                 continue;
    3002                 :            : 
    3003                 :          0 :                         cgroup_get_live(dsct);
    3004                 :          0 :                         prepare_to_wait(&dsct->offline_waitq, &wait,
    3005                 :            :                                         TASK_UNINTERRUPTIBLE);
    3006                 :            : 
    3007                 :          0 :                         mutex_unlock(&cgroup_mutex);
    3008                 :          0 :                         schedule();
    3009                 :          0 :                         finish_wait(&dsct->offline_waitq, &wait);
    3010                 :            : 
    3011                 :            :                         cgroup_put(dsct);
    3012                 :          0 :                         goto restart;
    3013                 :            :                 }
    3014                 :            :         }
    3015                 :          3 : }
    3016                 :            : 
    3017                 :            : /**
    3018                 :            :  * cgroup_save_control - save control masks and dom_cgrp of a subtree
    3019                 :            :  * @cgrp: root of the target subtree
    3020                 :            :  *
    3021                 :            :  * Save ->subtree_control, ->subtree_ss_mask and ->dom_cgrp to the
    3022                 :            :  * respective old_ prefixed fields for @cgrp's subtree including @cgrp
    3023                 :            :  * itself.
    3024                 :            :  */
    3025                 :          0 : static void cgroup_save_control(struct cgroup *cgrp)
    3026                 :            : {
    3027                 :            :         struct cgroup *dsct;
    3028                 :            :         struct cgroup_subsys_state *d_css;
    3029                 :            : 
    3030                 :          0 :         cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
    3031                 :          0 :                 dsct->old_subtree_control = dsct->subtree_control;
    3032                 :          0 :                 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
    3033                 :          0 :                 dsct->old_dom_cgrp = dsct->dom_cgrp;
    3034                 :            :         }
    3035                 :          0 : }
    3036                 :            : 
    3037                 :            : /**
    3038                 :            :  * cgroup_propagate_control - refresh control masks of a subtree
    3039                 :            :  * @cgrp: root of the target subtree
    3040                 :            :  *
    3041                 :            :  * For @cgrp and its subtree, ensure ->subtree_ss_mask matches
    3042                 :            :  * ->subtree_control and propagate controller availability through the
    3043                 :            :  * subtree so that descendants don't have unavailable controllers enabled.
    3044                 :            :  */
    3045                 :          3 : static void cgroup_propagate_control(struct cgroup *cgrp)
    3046                 :            : {
    3047                 :            :         struct cgroup *dsct;
    3048                 :            :         struct cgroup_subsys_state *d_css;
    3049                 :            : 
    3050                 :          3 :         cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
    3051                 :          3 :                 dsct->subtree_control &= cgroup_control(dsct);
    3052                 :          3 :                 dsct->subtree_ss_mask =
    3053                 :          3 :                         cgroup_calc_subtree_ss_mask(dsct->subtree_control,
    3054                 :            :                                                     cgroup_ss_mask(dsct));
    3055                 :            :         }
    3056                 :          3 : }
    3057                 :            : 
    3058                 :            : /**
    3059                 :            :  * cgroup_restore_control - restore control masks and dom_cgrp of a subtree
    3060                 :            :  * @cgrp: root of the target subtree
    3061                 :            :  *
    3062                 :            :  * Restore ->subtree_control, ->subtree_ss_mask and ->dom_cgrp from the
    3063                 :            :  * respective old_ prefixed fields for @cgrp's subtree including @cgrp
    3064                 :            :  * itself.
    3065                 :            :  */
    3066                 :          0 : static void cgroup_restore_control(struct cgroup *cgrp)
    3067                 :            : {
    3068                 :            :         struct cgroup *dsct;
    3069                 :            :         struct cgroup_subsys_state *d_css;
    3070                 :            : 
    3071                 :          0 :         cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
    3072                 :          0 :                 dsct->subtree_control = dsct->old_subtree_control;
    3073                 :          0 :                 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
    3074                 :          0 :                 dsct->dom_cgrp = dsct->old_dom_cgrp;
    3075                 :            :         }
    3076                 :          0 : }
    3077                 :            : 
    3078                 :          3 : static bool css_visible(struct cgroup_subsys_state *css)
    3079                 :            : {
    3080                 :          3 :         struct cgroup_subsys *ss = css->ss;
    3081                 :          3 :         struct cgroup *cgrp = css->cgroup;
    3082                 :            : 
    3083                 :          3 :         if (cgroup_control(cgrp) & (1 << ss->id))
    3084                 :            :                 return true;
    3085                 :          3 :         if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
    3086                 :            :                 return false;
    3087                 :          3 :         return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
    3088                 :            : }
    3089                 :            : 
    3090                 :            : /**
    3091                 :            :  * cgroup_apply_control_enable - enable or show csses according to control
    3092                 :            :  * @cgrp: root of the target subtree
    3093                 :            :  *
    3094                 :            :  * Walk @cgrp's subtree and create new csses or make the existing ones
    3095                 :            :  * visible.  A css is created invisible if it's being implicitly enabled
    3096                 :            :  * through dependency.  An invisible css is made visible when the userland
    3097                 :            :  * explicitly enables it.
    3098                 :            :  *
    3099                 :            :  * Returns 0 on success, -errno on failure.  On failure, csses which have
    3100                 :            :  * been processed already aren't cleaned up.  The caller is responsible for
    3101                 :            :  * cleaning up with cgroup_apply_control_disable().
    3102                 :            :  */
    3103                 :          3 : static int cgroup_apply_control_enable(struct cgroup *cgrp)
    3104                 :            : {
    3105                 :            :         struct cgroup *dsct;
    3106                 :            :         struct cgroup_subsys_state *d_css;
    3107                 :            :         struct cgroup_subsys *ss;
    3108                 :            :         int ssid, ret;
    3109                 :            : 
    3110                 :          3 :         cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
    3111                 :          3 :                 for_each_subsys(ss, ssid) {
    3112                 :            :                         struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
    3113                 :            : 
    3114                 :          3 :                         if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
    3115                 :          3 :                                 continue;
    3116                 :            : 
    3117                 :          3 :                         if (!css) {
    3118                 :          3 :                                 css = css_create(dsct, ss);
    3119                 :          3 :                                 if (IS_ERR(css))
    3120                 :          0 :                                         return PTR_ERR(css);
    3121                 :            :                         }
    3122                 :            : 
    3123                 :          3 :                         WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
    3124                 :            : 
    3125                 :          3 :                         if (css_visible(css)) {
    3126                 :          3 :                                 ret = css_populate_dir(css);
    3127                 :          3 :                                 if (ret)
    3128                 :          0 :                                         return ret;
    3129                 :            :                         }
    3130                 :            :                 }
    3131                 :            :         }
    3132                 :            : 
    3133                 :            :         return 0;
    3134                 :            : }
    3135                 :            : 
    3136                 :            : /**
    3137                 :            :  * cgroup_apply_control_disable - kill or hide csses according to control
    3138                 :            :  * @cgrp: root of the target subtree
    3139                 :            :  *
    3140                 :            :  * Walk @cgrp's subtree and kill and hide csses so that they match
    3141                 :            :  * cgroup_ss_mask() and cgroup_visible_mask().
    3142                 :            :  *
    3143                 :            :  * A css is hidden when the userland requests it to be disabled while other
    3144                 :            :  * subsystems are still depending on it.  The css must not actively control
    3145                 :            :  * resources and be in the vanilla state if it's made visible again later.
    3146                 :            :  * Controllers which may be depended upon should provide ->css_reset() for
    3147                 :            :  * this purpose.
    3148                 :            :  */
    3149                 :          3 : static void cgroup_apply_control_disable(struct cgroup *cgrp)
    3150                 :            : {
    3151                 :            :         struct cgroup *dsct;
    3152                 :            :         struct cgroup_subsys_state *d_css;
    3153                 :            :         struct cgroup_subsys *ss;
    3154                 :            :         int ssid;
    3155                 :            : 
    3156                 :          3 :         cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
    3157                 :          3 :                 for_each_subsys(ss, ssid) {
    3158                 :            :                         struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
    3159                 :            : 
    3160                 :          3 :                         if (!css)
    3161                 :          3 :                                 continue;
    3162                 :            : 
    3163                 :          3 :                         WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
    3164                 :            : 
    3165                 :          3 :                         if (css->parent &&
    3166                 :          0 :                             !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
    3167                 :          0 :                                 kill_css(css);
    3168                 :          3 :                         } else if (!css_visible(css)) {
    3169                 :          3 :                                 css_clear_dir(css);
    3170                 :          3 :                                 if (ss->css_reset)
    3171                 :          3 :                                         ss->css_reset(css);
    3172                 :            :                         }
    3173                 :            :                 }
    3174                 :            :         }
    3175                 :          3 : }
    3176                 :            : 
    3177                 :            : /**
    3178                 :            :  * cgroup_apply_control - apply control mask updates to the subtree
    3179                 :            :  * @cgrp: root of the target subtree
    3180                 :            :  *
    3181                 :            :  * subsystems can be enabled and disabled in a subtree using the following
    3182                 :            :  * steps.
    3183                 :            :  *
    3184                 :            :  * 1. Call cgroup_save_control() to stash the current state.
    3185                 :            :  * 2. Update ->subtree_control masks in the subtree as desired.
    3186                 :            :  * 3. Call cgroup_apply_control() to apply the changes.
    3187                 :            :  * 4. Optionally perform other related operations.
    3188                 :            :  * 5. Call cgroup_finalize_control() to finish up.
    3189                 :            :  *
    3190                 :            :  * This function implements step 3 and propagates the mask changes
    3191                 :            :  * throughout @cgrp's subtree, updates csses accordingly and perform
    3192                 :            :  * process migrations.
    3193                 :            :  */
    3194                 :          3 : static int cgroup_apply_control(struct cgroup *cgrp)
    3195                 :            : {
    3196                 :            :         int ret;
    3197                 :            : 
    3198                 :          3 :         cgroup_propagate_control(cgrp);
    3199                 :            : 
    3200                 :          3 :         ret = cgroup_apply_control_enable(cgrp);
    3201                 :          3 :         if (ret)
    3202                 :            :                 return ret;
    3203                 :            : 
    3204                 :            :         /*
    3205                 :            :          * At this point, cgroup_e_css_by_mask() results reflect the new csses
    3206                 :            :          * making the following cgroup_update_dfl_csses() properly update
    3207                 :            :          * css associations of all tasks in the subtree.
    3208                 :            :          */
    3209                 :          3 :         ret = cgroup_update_dfl_csses(cgrp);
    3210                 :          3 :         if (ret)
    3211                 :          0 :                 return ret;
    3212                 :            : 
    3213                 :            :         return 0;
    3214                 :            : }
    3215                 :            : 
    3216                 :            : /**
    3217                 :            :  * cgroup_finalize_control - finalize control mask update
    3218                 :            :  * @cgrp: root of the target subtree
    3219                 :            :  * @ret: the result of the update
    3220                 :            :  *
    3221                 :            :  * Finalize control mask update.  See cgroup_apply_control() for more info.
    3222                 :            :  */
    3223                 :          3 : static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
    3224                 :            : {
    3225                 :          3 :         if (ret) {
    3226                 :          0 :                 cgroup_restore_control(cgrp);
    3227                 :          0 :                 cgroup_propagate_control(cgrp);
    3228                 :            :         }
    3229                 :            : 
    3230                 :          3 :         cgroup_apply_control_disable(cgrp);
    3231                 :          3 : }
    3232                 :            : 
    3233                 :          0 : static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
    3234                 :            : {
    3235                 :          0 :         u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
    3236                 :            : 
    3237                 :            :         /* if nothing is getting enabled, nothing to worry about */
    3238                 :          0 :         if (!enable)
    3239                 :            :                 return 0;
    3240                 :            : 
    3241                 :            :         /* can @cgrp host any resources? */
    3242                 :          0 :         if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
    3243                 :            :                 return -EOPNOTSUPP;
    3244                 :            : 
    3245                 :            :         /* mixables don't care */
    3246                 :          0 :         if (cgroup_is_mixable(cgrp))
    3247                 :            :                 return 0;
    3248                 :            : 
    3249                 :          0 :         if (domain_enable) {
    3250                 :            :                 /* can't enable domain controllers inside a thread subtree */
    3251                 :          0 :                 if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
    3252                 :            :                         return -EOPNOTSUPP;
    3253                 :            :         } else {
    3254                 :            :                 /*
    3255                 :            :                  * Threaded controllers can handle internal competitions
    3256                 :            :                  * and are always allowed inside a (prospective) thread
    3257                 :            :                  * subtree.
    3258                 :            :                  */
    3259                 :          0 :                 if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
    3260                 :            :                         return 0;
    3261                 :            :         }
    3262                 :            : 
    3263                 :            :         /*
    3264                 :            :          * Controllers can't be enabled for a cgroup with tasks to avoid
    3265                 :            :          * child cgroups competing against tasks.
    3266                 :            :          */
    3267                 :          0 :         if (cgroup_has_tasks(cgrp))
    3268                 :            :                 return -EBUSY;
    3269                 :            : 
    3270                 :          0 :         return 0;
    3271                 :            : }
    3272                 :            : 
    3273                 :            : /* change the enabled child controllers for a cgroup in the default hierarchy */
    3274                 :          0 : static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
    3275                 :            :                                             char *buf, size_t nbytes,
    3276                 :            :                                             loff_t off)
    3277                 :            : {
    3278                 :            :         u16 enable = 0, disable = 0;
    3279                 :            :         struct cgroup *cgrp, *child;
    3280                 :            :         struct cgroup_subsys *ss;
    3281                 :            :         char *tok;
    3282                 :            :         int ssid, ret;
    3283                 :            : 
    3284                 :            :         /*
    3285                 :            :          * Parse input - space separated list of subsystem names prefixed
    3286                 :            :          * with either + or -.
    3287                 :            :          */
    3288                 :          0 :         buf = strstrip(buf);
    3289                 :          0 :         while ((tok = strsep(&buf, " "))) {
    3290                 :          0 :                 if (tok[0] == '\0')
    3291                 :          0 :                         continue;
    3292                 :          0 :                 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
    3293                 :          0 :                         if (!cgroup_ssid_enabled(ssid) ||
    3294                 :          0 :                             strcmp(tok + 1, ss->name))
    3295                 :          0 :                                 continue;
    3296                 :            : 
    3297                 :          0 :                         if (*tok == '+') {
    3298                 :          0 :                                 enable |= 1 << ssid;
    3299                 :          0 :                                 disable &= ~(1 << ssid);
    3300                 :          0 :                         } else if (*tok == '-') {
    3301                 :          0 :                                 disable |= 1 << ssid;
    3302                 :          0 :                                 enable &= ~(1 << ssid);
    3303                 :            :                         } else {
    3304                 :          0 :                                 return -EINVAL;
    3305                 :            :                         }
    3306                 :            :                         break;
    3307                 :            :                 } while_each_subsys_mask();
    3308                 :          0 :                 if (ssid == CGROUP_SUBSYS_COUNT)
    3309                 :            :                         return -EINVAL;
    3310                 :            :         }
    3311                 :            : 
    3312                 :          0 :         cgrp = cgroup_kn_lock_live(of->kn, true);
    3313                 :          0 :         if (!cgrp)
    3314                 :            :                 return -ENODEV;
    3315                 :            : 
    3316                 :          0 :         for_each_subsys(ss, ssid) {
    3317                 :          0 :                 if (enable & (1 << ssid)) {
    3318                 :          0 :                         if (cgrp->subtree_control & (1 << ssid)) {
    3319                 :          0 :                                 enable &= ~(1 << ssid);
    3320                 :          0 :                                 continue;
    3321                 :            :                         }
    3322                 :            : 
    3323                 :          0 :                         if (!(cgroup_control(cgrp) & (1 << ssid))) {
    3324                 :            :                                 ret = -ENOENT;
    3325                 :            :                                 goto out_unlock;
    3326                 :            :                         }
    3327                 :          0 :                 } else if (disable & (1 << ssid)) {
    3328                 :          0 :                         if (!(cgrp->subtree_control & (1 << ssid))) {
    3329                 :          0 :                                 disable &= ~(1 << ssid);
    3330                 :          0 :                                 continue;
    3331                 :            :                         }
    3332                 :            : 
    3333                 :            :                         /* a child has it enabled? */
    3334                 :          0 :                         cgroup_for_each_live_child(child, cgrp) {
    3335                 :          0 :                                 if (child->subtree_control & (1 << ssid)) {
    3336                 :            :                                         ret = -EBUSY;
    3337                 :            :                                         goto out_unlock;
    3338                 :            :                                 }
    3339                 :            :                         }
    3340                 :            :                 }
    3341                 :            :         }
    3342                 :            : 
    3343                 :          0 :         if (!enable && !disable) {
    3344                 :            :                 ret = 0;
    3345                 :            :                 goto out_unlock;
    3346                 :            :         }
    3347                 :            : 
    3348                 :          0 :         ret = cgroup_vet_subtree_control_enable(cgrp, enable);
    3349                 :          0 :         if (ret)
    3350                 :            :                 goto out_unlock;
    3351                 :            : 
    3352                 :            :         /* save and update control masks and prepare csses */
    3353                 :          0 :         cgroup_save_control(cgrp);
    3354                 :            : 
    3355                 :          0 :         cgrp->subtree_control |= enable;
    3356                 :          0 :         cgrp->subtree_control &= ~disable;
    3357                 :            : 
    3358                 :          0 :         ret = cgroup_apply_control(cgrp);
    3359                 :          0 :         cgroup_finalize_control(cgrp, ret);
    3360                 :          0 :         if (ret)
    3361                 :            :                 goto out_unlock;
    3362                 :            : 
    3363                 :          0 :         kernfs_activate(cgrp->kn);
    3364                 :            : out_unlock:
    3365                 :          0 :         cgroup_kn_unlock(of->kn);
    3366                 :          0 :         return ret ?: nbytes;
    3367                 :            : }
    3368                 :            : 
    3369                 :            : /**
    3370                 :            :  * cgroup_enable_threaded - make @cgrp threaded
    3371                 :            :  * @cgrp: the target cgroup
    3372                 :            :  *
    3373                 :            :  * Called when "threaded" is written to the cgroup.type interface file and
    3374                 :            :  * tries to make @cgrp threaded and join the parent's resource domain.
    3375                 :            :  * This function is never called on the root cgroup as cgroup.type doesn't
    3376                 :            :  * exist on it.
    3377                 :            :  */
    3378                 :          0 : static int cgroup_enable_threaded(struct cgroup *cgrp)
    3379                 :            : {
    3380                 :            :         struct cgroup *parent = cgroup_parent(cgrp);
    3381                 :          0 :         struct cgroup *dom_cgrp = parent->dom_cgrp;
    3382                 :            :         struct cgroup *dsct;
    3383                 :            :         struct cgroup_subsys_state *d_css;
    3384                 :            :         int ret;
    3385                 :            : 
    3386                 :            :         lockdep_assert_held(&cgroup_mutex);
    3387                 :            : 
    3388                 :            :         /* noop if already threaded */
    3389                 :          0 :         if (cgroup_is_threaded(cgrp))
    3390                 :            :                 return 0;
    3391                 :            : 
    3392                 :            :         /*
    3393                 :            :          * If @cgroup is populated or has domain controllers enabled, it
    3394                 :            :          * can't be switched.  While the below cgroup_can_be_thread_root()
    3395                 :            :          * test can catch the same conditions, that's only when @parent is
    3396                 :            :          * not mixable, so let's check it explicitly.
    3397                 :            :          */
    3398                 :          0 :         if (cgroup_is_populated(cgrp) ||
    3399                 :          0 :             cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
    3400                 :            :                 return -EOPNOTSUPP;
    3401                 :            : 
    3402                 :            :         /* we're joining the parent's domain, ensure its validity */
    3403                 :          0 :         if (!cgroup_is_valid_domain(dom_cgrp) ||
    3404                 :          0 :             !cgroup_can_be_thread_root(dom_cgrp))
    3405                 :            :                 return -EOPNOTSUPP;
    3406                 :            : 
    3407                 :            :         /*
    3408                 :            :          * The following shouldn't cause actual migrations and should
    3409                 :            :          * always succeed.
    3410                 :            :          */
    3411                 :          0 :         cgroup_save_control(cgrp);
    3412                 :            : 
    3413                 :          0 :         cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)
    3414                 :          0 :                 if (dsct == cgrp || cgroup_is_threaded(dsct))
    3415                 :          0 :                         dsct->dom_cgrp = dom_cgrp;
    3416                 :            : 
    3417                 :          0 :         ret = cgroup_apply_control(cgrp);
    3418                 :          0 :         if (!ret)
    3419                 :          0 :                 parent->nr_threaded_children++;
    3420                 :            : 
    3421                 :          0 :         cgroup_finalize_control(cgrp, ret);
    3422                 :          0 :         return ret;
    3423                 :            : }
    3424                 :            : 
    3425                 :          0 : static int cgroup_type_show(struct seq_file *seq, void *v)
    3426                 :            : {
    3427                 :          0 :         struct cgroup *cgrp = seq_css(seq)->cgroup;
    3428                 :            : 
    3429                 :          0 :         if (cgroup_is_threaded(cgrp))
    3430                 :          0 :                 seq_puts(seq, "threaded\n");
    3431                 :          0 :         else if (!cgroup_is_valid_domain(cgrp))
    3432                 :          0 :                 seq_puts(seq, "domain invalid\n");
    3433                 :          0 :         else if (cgroup_is_thread_root(cgrp))
    3434                 :          0 :                 seq_puts(seq, "domain threaded\n");
    3435                 :            :         else
    3436                 :          0 :                 seq_puts(seq, "domain\n");
    3437                 :            : 
    3438                 :          0 :         return 0;
    3439                 :            : }
    3440                 :            : 
    3441                 :          0 : static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
    3442                 :            :                                  size_t nbytes, loff_t off)
    3443                 :            : {
    3444                 :            :         struct cgroup *cgrp;
    3445                 :            :         int ret;
    3446                 :            : 
    3447                 :            :         /* only switching to threaded mode is supported */
    3448                 :          0 :         if (strcmp(strstrip(buf), "threaded"))
    3449                 :            :                 return -EINVAL;
    3450                 :            : 
    3451                 :            :         /* drain dying csses before we re-apply (threaded) subtree control */
    3452                 :          0 :         cgrp = cgroup_kn_lock_live(of->kn, true);
    3453                 :          0 :         if (!cgrp)
    3454                 :            :                 return -ENOENT;
    3455                 :            : 
    3456                 :            :         /* threaded can only be enabled */
    3457                 :          0 :         ret = cgroup_enable_threaded(cgrp);
    3458                 :            : 
    3459                 :          0 :         cgroup_kn_unlock(of->kn);
    3460                 :          0 :         return ret ?: nbytes;
    3461                 :            : }
    3462                 :            : 
    3463                 :          0 : static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
    3464                 :            : {
    3465                 :          0 :         struct cgroup *cgrp = seq_css(seq)->cgroup;
    3466                 :          0 :         int descendants = READ_ONCE(cgrp->max_descendants);
    3467                 :            : 
    3468                 :          0 :         if (descendants == INT_MAX)
    3469                 :          0 :                 seq_puts(seq, "max\n");
    3470                 :            :         else
    3471                 :          0 :                 seq_printf(seq, "%d\n", descendants);
    3472                 :            : 
    3473                 :          0 :         return 0;
    3474                 :            : }
    3475                 :            : 
    3476                 :          0 : static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
    3477                 :            :                                            char *buf, size_t nbytes, loff_t off)
    3478                 :            : {
    3479                 :            :         struct cgroup *cgrp;
    3480                 :            :         int descendants;
    3481                 :            :         ssize_t ret;
    3482                 :            : 
    3483                 :            :         buf = strstrip(buf);
    3484                 :          0 :         if (!strcmp(buf, "max")) {
    3485                 :          0 :                 descendants = INT_MAX;
    3486                 :            :         } else {
    3487                 :          0 :                 ret = kstrtoint(buf, 0, &descendants);
    3488                 :          0 :                 if (ret)
    3489                 :            :                         return ret;
    3490                 :            :         }
    3491                 :            : 
    3492                 :          0 :         if (descendants < 0)
    3493                 :            :                 return -ERANGE;
    3494                 :            : 
    3495                 :          0 :         cgrp = cgroup_kn_lock_live(of->kn, false);
    3496                 :          0 :         if (!cgrp)
    3497                 :            :                 return -ENOENT;
    3498                 :            : 
    3499                 :          0 :         cgrp->max_descendants = descendants;
    3500                 :            : 
    3501                 :          0 :         cgroup_kn_unlock(of->kn);
    3502                 :            : 
    3503                 :          0 :         return nbytes;
    3504                 :            : }
    3505                 :            : 
    3506                 :          0 : static int cgroup_max_depth_show(struct seq_file *seq, void *v)
    3507                 :            : {
    3508                 :          0 :         struct cgroup *cgrp = seq_css(seq)->cgroup;
    3509                 :          0 :         int depth = READ_ONCE(cgrp->max_depth);
    3510                 :            : 
    3511                 :          0 :         if (depth == INT_MAX)
    3512                 :          0 :                 seq_puts(seq, "max\n");
    3513                 :            :         else
    3514                 :          0 :                 seq_printf(seq, "%d\n", depth);
    3515                 :            : 
    3516                 :          0 :         return 0;
    3517                 :            : }
    3518                 :            : 
    3519                 :          0 : static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
    3520                 :            :                                       char *buf, size_t nbytes, loff_t off)
    3521                 :            : {
    3522                 :            :         struct cgroup *cgrp;
    3523                 :            :         ssize_t ret;
    3524                 :            :         int depth;
    3525                 :            : 
    3526                 :            :         buf = strstrip(buf);
    3527                 :          0 :         if (!strcmp(buf, "max")) {
    3528                 :          0 :                 depth = INT_MAX;
    3529                 :            :         } else {
    3530                 :          0 :                 ret = kstrtoint(buf, 0, &depth);
    3531                 :          0 :                 if (ret)
    3532                 :            :                         return ret;
    3533                 :            :         }
    3534                 :            : 
    3535                 :          0 :         if (depth < 0)
    3536                 :            :                 return -ERANGE;
    3537                 :            : 
    3538                 :          0 :         cgrp = cgroup_kn_lock_live(of->kn, false);
    3539                 :          0 :         if (!cgrp)
    3540                 :            :                 return -ENOENT;
    3541                 :            : 
    3542                 :          0 :         cgrp->max_depth = depth;
    3543                 :            : 
    3544                 :          0 :         cgroup_kn_unlock(of->kn);
    3545                 :            : 
    3546                 :          0 :         return nbytes;
    3547                 :            : }
    3548                 :            : 
    3549                 :          3 : static int cgroup_events_show(struct seq_file *seq, void *v)
    3550                 :            : {
    3551                 :          3 :         struct cgroup *cgrp = seq_css(seq)->cgroup;
    3552                 :            : 
    3553                 :          3 :         seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
    3554                 :          3 :         seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
    3555                 :            : 
    3556                 :          3 :         return 0;
    3557                 :            : }
    3558                 :            : 
    3559                 :          0 : static int cgroup_stat_show(struct seq_file *seq, void *v)
    3560                 :            : {
    3561                 :          0 :         struct cgroup *cgroup = seq_css(seq)->cgroup;
    3562                 :            : 
    3563                 :          0 :         seq_printf(seq, "nr_descendants %d\n",
    3564                 :            :                    cgroup->nr_descendants);
    3565                 :          0 :         seq_printf(seq, "nr_dying_descendants %d\n",
    3566                 :            :                    cgroup->nr_dying_descendants);
    3567                 :            : 
    3568                 :          0 :         return 0;
    3569                 :            : }
    3570                 :            : 
    3571                 :          0 : static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
    3572                 :            :                                                  struct cgroup *cgrp, int ssid)
    3573                 :            : {
    3574                 :          0 :         struct cgroup_subsys *ss = cgroup_subsys[ssid];
    3575                 :            :         struct cgroup_subsys_state *css;
    3576                 :            :         int ret;
    3577                 :            : 
    3578                 :          0 :         if (!ss->css_extra_stat_show)
    3579                 :            :                 return 0;
    3580                 :            : 
    3581                 :          0 :         css = cgroup_tryget_css(cgrp, ss);
    3582                 :          0 :         if (!css)
    3583                 :            :                 return 0;
    3584                 :            : 
    3585                 :          0 :         ret = ss->css_extra_stat_show(seq, css);
    3586                 :            :         css_put(css);
    3587                 :          0 :         return ret;
    3588                 :            : }
    3589                 :            : 
    3590                 :          0 : static int cpu_stat_show(struct seq_file *seq, void *v)
    3591                 :            : {
    3592                 :          0 :         struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
    3593                 :            :         int ret = 0;
    3594                 :            : 
    3595                 :          0 :         cgroup_base_stat_cputime_show(seq);
    3596                 :            : #ifdef CONFIG_CGROUP_SCHED
    3597                 :          0 :         ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
    3598                 :            : #endif
    3599                 :          0 :         return ret;
    3600                 :            : }
    3601                 :            : 
    3602                 :            : #ifdef CONFIG_PSI
    3603                 :            : static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
    3604                 :            : {
    3605                 :            :         struct cgroup *cgroup = seq_css(seq)->cgroup;
    3606                 :            :         struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
    3607                 :            : 
    3608                 :            :         return psi_show(seq, psi, PSI_IO);
    3609                 :            : }
    3610                 :            : static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
    3611                 :            : {
    3612                 :            :         struct cgroup *cgroup = seq_css(seq)->cgroup;
    3613                 :            :         struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
    3614                 :            : 
    3615                 :            :         return psi_show(seq, psi, PSI_MEM);
    3616                 :            : }
    3617                 :            : static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
    3618                 :            : {
    3619                 :            :         struct cgroup *cgroup = seq_css(seq)->cgroup;
    3620                 :            :         struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
    3621                 :            : 
    3622                 :            :         return psi_show(seq, psi, PSI_CPU);
    3623                 :            : }
    3624                 :            : 
    3625                 :            : static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
    3626                 :            :                                           size_t nbytes, enum psi_res res)
    3627                 :            : {
    3628                 :            :         struct psi_trigger *new;
    3629                 :            :         struct cgroup *cgrp;
    3630                 :            : 
    3631                 :            :         cgrp = cgroup_kn_lock_live(of->kn, false);
    3632                 :            :         if (!cgrp)
    3633                 :            :                 return -ENODEV;
    3634                 :            : 
    3635                 :            :         cgroup_get(cgrp);
    3636                 :            :         cgroup_kn_unlock(of->kn);
    3637                 :            : 
    3638                 :            :         new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
    3639                 :            :         if (IS_ERR(new)) {
    3640                 :            :                 cgroup_put(cgrp);
    3641                 :            :                 return PTR_ERR(new);
    3642                 :            :         }
    3643                 :            : 
    3644                 :            :         psi_trigger_replace(&of->priv, new);
    3645                 :            : 
    3646                 :            :         cgroup_put(cgrp);
    3647                 :            : 
    3648                 :            :         return nbytes;
    3649                 :            : }
    3650                 :            : 
    3651                 :            : static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
    3652                 :            :                                           char *buf, size_t nbytes,
    3653                 :            :                                           loff_t off)
    3654                 :            : {
    3655                 :            :         return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
    3656                 :            : }
    3657                 :            : 
    3658                 :            : static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
    3659                 :            :                                           char *buf, size_t nbytes,
    3660                 :            :                                           loff_t off)
    3661                 :            : {
    3662                 :            :         return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
    3663                 :            : }
    3664                 :            : 
    3665                 :            : static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
    3666                 :            :                                           char *buf, size_t nbytes,
    3667                 :            :                                           loff_t off)
    3668                 :            : {
    3669                 :            :         return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
    3670                 :            : }
    3671                 :            : 
    3672                 :            : static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
    3673                 :            :                                           poll_table *pt)
    3674                 :            : {
    3675                 :            :         return psi_trigger_poll(&of->priv, of->file, pt);
    3676                 :            : }
    3677                 :            : 
    3678                 :            : static void cgroup_pressure_release(struct kernfs_open_file *of)
    3679                 :            : {
    3680                 :            :         psi_trigger_replace(&of->priv, NULL);
    3681                 :            : }
    3682                 :            : #endif /* CONFIG_PSI */
    3683                 :            : 
    3684                 :          0 : static int cgroup_freeze_show(struct seq_file *seq, void *v)
    3685                 :            : {
    3686                 :          0 :         struct cgroup *cgrp = seq_css(seq)->cgroup;
    3687                 :            : 
    3688                 :          0 :         seq_printf(seq, "%d\n", cgrp->freezer.freeze);
    3689                 :            : 
    3690                 :          0 :         return 0;
    3691                 :            : }
    3692                 :            : 
    3693                 :          0 : static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
    3694                 :            :                                    char *buf, size_t nbytes, loff_t off)
    3695                 :            : {
    3696                 :            :         struct cgroup *cgrp;
    3697                 :            :         ssize_t ret;
    3698                 :            :         int freeze;
    3699                 :            : 
    3700                 :          0 :         ret = kstrtoint(strstrip(buf), 0, &freeze);
    3701                 :          0 :         if (ret)
    3702                 :            :                 return ret;
    3703                 :            : 
    3704                 :          0 :         if (freeze < 0 || freeze > 1)
    3705                 :            :                 return -ERANGE;
    3706                 :            : 
    3707                 :          0 :         cgrp = cgroup_kn_lock_live(of->kn, false);
    3708                 :          0 :         if (!cgrp)
    3709                 :            :                 return -ENOENT;
    3710                 :            : 
    3711                 :          0 :         cgroup_freeze(cgrp, freeze);
    3712                 :            : 
    3713                 :          0 :         cgroup_kn_unlock(of->kn);
    3714                 :            : 
    3715                 :          0 :         return nbytes;
    3716                 :            : }
    3717                 :            : 
    3718                 :          3 : static int cgroup_file_open(struct kernfs_open_file *of)
    3719                 :            : {
    3720                 :          3 :         struct cftype *cft = of->kn->priv;
    3721                 :            : 
    3722                 :          3 :         if (cft->open)
    3723                 :          0 :                 return cft->open(of);
    3724                 :            :         return 0;
    3725                 :            : }
    3726                 :            : 
    3727                 :          3 : static void cgroup_file_release(struct kernfs_open_file *of)
    3728                 :            : {
    3729                 :          3 :         struct cftype *cft = of->kn->priv;
    3730                 :            : 
    3731                 :          3 :         if (cft->release)
    3732                 :          3 :                 cft->release(of);
    3733                 :          3 : }
    3734                 :            : 
    3735                 :          3 : static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
    3736                 :            :                                  size_t nbytes, loff_t off)
    3737                 :            : {
    3738                 :          3 :         struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
    3739                 :          3 :         struct cgroup *cgrp = of->kn->parent->priv;
    3740                 :          3 :         struct cftype *cft = of->kn->priv;
    3741                 :            :         struct cgroup_subsys_state *css;
    3742                 :            :         int ret;
    3743                 :            : 
    3744                 :            :         /*
    3745                 :            :          * If namespaces are delegation boundaries, disallow writes to
    3746                 :            :          * files in an non-init namespace root from inside the namespace
    3747                 :            :          * except for the files explicitly marked delegatable -
    3748                 :            :          * cgroup.procs and cgroup.subtree_control.
    3749                 :            :          */
    3750                 :          3 :         if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
    3751                 :          3 :             !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
    3752                 :          0 :             ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
    3753                 :            :                 return -EPERM;
    3754                 :            : 
    3755                 :          3 :         if (cft->write)
    3756                 :          3 :                 return cft->write(of, buf, nbytes, off);
    3757                 :            : 
    3758                 :            :         /*
    3759                 :            :          * kernfs guarantees that a file isn't deleted with operations in
    3760                 :            :          * flight, which means that the matching css is and stays alive and
    3761                 :            :          * doesn't need to be pinned.  The RCU locking is not necessary
    3762                 :            :          * either.  It's just for the convenience of using cgroup_css().
    3763                 :            :          */
    3764                 :            :         rcu_read_lock();
    3765                 :          0 :         css = cgroup_css(cgrp, cft->ss);
    3766                 :            :         rcu_read_unlock();
    3767                 :            : 
    3768                 :          0 :         if (cft->write_u64) {
    3769                 :            :                 unsigned long long v;
    3770                 :          0 :                 ret = kstrtoull(buf, 0, &v);
    3771                 :          0 :                 if (!ret)
    3772                 :          0 :                         ret = cft->write_u64(css, cft, v);
    3773                 :          0 :         } else if (cft->write_s64) {
    3774                 :            :                 long long v;
    3775                 :          0 :                 ret = kstrtoll(buf, 0, &v);
    3776                 :          0 :                 if (!ret)
    3777                 :          0 :                         ret = cft->write_s64(css, cft, v);
    3778                 :            :         } else {
    3779                 :            :                 ret = -EINVAL;
    3780                 :            :         }
    3781                 :            : 
    3782                 :          0 :         return ret ?: nbytes;
    3783                 :            : }
    3784                 :            : 
    3785                 :          0 : static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
    3786                 :            : {
    3787                 :          0 :         struct cftype *cft = of->kn->priv;
    3788                 :            : 
    3789                 :          0 :         if (cft->poll)
    3790                 :          0 :                 return cft->poll(of, pt);
    3791                 :            : 
    3792                 :          0 :         return kernfs_generic_poll(of, pt);
    3793                 :            : }
    3794                 :            : 
    3795                 :          3 : static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
    3796                 :            : {
    3797                 :          3 :         return seq_cft(seq)->seq_start(seq, ppos);
    3798                 :            : }
    3799                 :            : 
    3800                 :          3 : static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
    3801                 :            : {
    3802                 :          3 :         return seq_cft(seq)->seq_next(seq, v, ppos);
    3803                 :            : }
    3804                 :            : 
    3805                 :          3 : static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
    3806                 :            : {
    3807                 :          3 :         if (seq_cft(seq)->seq_stop)
    3808                 :          0 :                 seq_cft(seq)->seq_stop(seq, v);
    3809                 :          3 : }
    3810                 :            : 
    3811                 :          3 : static int cgroup_seqfile_show(struct seq_file *m, void *arg)
    3812                 :            : {
    3813                 :            :         struct cftype *cft = seq_cft(m);
    3814                 :            :         struct cgroup_subsys_state *css = seq_css(m);
    3815                 :            : 
    3816                 :          3 :         if (cft->seq_show)
    3817                 :          3 :                 return cft->seq_show(m, arg);
    3818                 :            : 
    3819                 :          3 :         if (cft->read_u64)
    3820                 :          0 :                 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
    3821                 :          3 :         else if (cft->read_s64)
    3822                 :          3 :                 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
    3823                 :            :         else
    3824                 :            :                 return -EINVAL;
    3825                 :            :         return 0;
    3826                 :            : }
    3827                 :            : 
    3828                 :            : static struct kernfs_ops cgroup_kf_single_ops = {
    3829                 :            :         .atomic_write_len       = PAGE_SIZE,
    3830                 :            :         .open                   = cgroup_file_open,
    3831                 :            :         .release                = cgroup_file_release,
    3832                 :            :         .write                  = cgroup_file_write,
    3833                 :            :         .poll                   = cgroup_file_poll,
    3834                 :            :         .seq_show               = cgroup_seqfile_show,
    3835                 :            : };
    3836                 :            : 
    3837                 :            : static struct kernfs_ops cgroup_kf_ops = {
    3838                 :            :         .atomic_write_len       = PAGE_SIZE,
    3839                 :            :         .open                   = cgroup_file_open,
    3840                 :            :         .release                = cgroup_file_release,
    3841                 :            :         .write                  = cgroup_file_write,
    3842                 :            :         .poll                   = cgroup_file_poll,
    3843                 :            :         .seq_start              = cgroup_seqfile_start,
    3844                 :            :         .seq_next               = cgroup_seqfile_next,
    3845                 :            :         .seq_stop               = cgroup_seqfile_stop,
    3846                 :            :         .seq_show               = cgroup_seqfile_show,
    3847                 :            : };
    3848                 :            : 
    3849                 :            : /* set uid and gid of cgroup dirs and files to that of the creator */
    3850                 :          3 : static int cgroup_kn_set_ugid(struct kernfs_node *kn)
    3851                 :            : {
    3852                 :          3 :         struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
    3853                 :          3 :                                .ia_uid = current_fsuid(),
    3854                 :            :                                .ia_gid = current_fsgid(), };
    3855                 :            : 
    3856                 :          3 :         if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
    3857                 :            :             gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
    3858                 :            :                 return 0;
    3859                 :            : 
    3860                 :          3 :         return kernfs_setattr(kn, &iattr);
    3861                 :            : }
    3862                 :            : 
    3863                 :          3 : static void cgroup_file_notify_timer(struct timer_list *timer)
    3864                 :            : {
    3865                 :          3 :         cgroup_file_notify(container_of(timer, struct cgroup_file,
    3866                 :            :                                         notify_timer));
    3867                 :          3 : }
    3868                 :            : 
    3869                 :          3 : static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
    3870                 :            :                            struct cftype *cft)
    3871                 :            : {
    3872                 :            :         char name[CGROUP_FILE_NAME_MAX];
    3873                 :            :         struct kernfs_node *kn;
    3874                 :            :         struct lock_class_key *key = NULL;
    3875                 :            :         int ret;
    3876                 :            : 
    3877                 :            : #ifdef CONFIG_DEBUG_LOCK_ALLOC
    3878                 :            :         key = &cft->lockdep_key;
    3879                 :            : #endif
    3880                 :          3 :         kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
    3881                 :            :                                   cgroup_file_mode(cft),
    3882                 :          3 :                                   GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
    3883                 :          3 :                                   0, cft->kf_ops, cft,
    3884                 :            :                                   NULL, key);
    3885                 :          3 :         if (IS_ERR(kn))
    3886                 :          0 :                 return PTR_ERR(kn);
    3887                 :            : 
    3888                 :          3 :         ret = cgroup_kn_set_ugid(kn);
    3889                 :          3 :         if (ret) {
    3890                 :          0 :                 kernfs_remove(kn);
    3891                 :          0 :                 return ret;
    3892                 :            :         }
    3893                 :            : 
    3894                 :          3 :         if (cft->file_offset) {
    3895                 :          3 :                 struct cgroup_file *cfile = (void *)css + cft->file_offset;
    3896                 :            : 
    3897                 :          3 :                 timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
    3898                 :            : 
    3899                 :            :                 spin_lock_irq(&cgroup_file_kn_lock);
    3900                 :          3 :                 cfile->kn = kn;
    3901                 :            :                 spin_unlock_irq(&cgroup_file_kn_lock);
    3902                 :            :         }
    3903                 :            : 
    3904                 :            :         return 0;
    3905                 :            : }
    3906                 :            : 
    3907                 :            : /**
    3908                 :            :  * cgroup_addrm_files - add or remove files to a cgroup directory
    3909                 :            :  * @css: the target css
    3910                 :            :  * @cgrp: the target cgroup (usually css->cgroup)
    3911                 :            :  * @cfts: array of cftypes to be added
    3912                 :            :  * @is_add: whether to add or remove
    3913                 :            :  *
    3914                 :            :  * Depending on @is_add, add or remove files defined by @cfts on @cgrp.
    3915                 :            :  * For removals, this function never fails.
    3916                 :            :  */
    3917                 :          3 : static int cgroup_addrm_files(struct cgroup_subsys_state *css,
    3918                 :            :                               struct cgroup *cgrp, struct cftype cfts[],
    3919                 :            :                               bool is_add)
    3920                 :            : {
    3921                 :            :         struct cftype *cft, *cft_end = NULL;
    3922                 :            :         int ret = 0;
    3923                 :            : 
    3924                 :            :         lockdep_assert_held(&cgroup_mutex);
    3925                 :            : 
    3926                 :            : restart:
    3927                 :          3 :         for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
    3928                 :            :                 /* does cft->flags tell us to skip this file on @cgrp? */
    3929                 :          3 :                 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
    3930                 :          3 :                         continue;
    3931                 :          3 :                 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
    3932                 :          3 :                         continue;
    3933                 :          3 :                 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
    3934                 :          3 :                         continue;
    3935                 :          3 :                 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
    3936                 :          3 :                         continue;
    3937                 :          3 :                 if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug)
    3938                 :          3 :                         continue;
    3939                 :          3 :                 if (is_add) {
    3940                 :          3 :                         ret = cgroup_add_file(css, cgrp, cft);
    3941                 :          3 :                         if (ret) {
    3942                 :          0 :                                 pr_warn("%s: failed to add %s, err=%d\n",
    3943                 :            :                                         __func__, cft->name, ret);
    3944                 :          0 :                                 cft_end = cft;
    3945                 :            :                                 is_add = false;
    3946                 :          0 :                                 goto restart;
    3947                 :            :                         }
    3948                 :            :                 } else {
    3949                 :          3 :                         cgroup_rm_file(cgrp, cft);
    3950                 :            :                 }
    3951                 :            :         }
    3952                 :          3 :         return ret;
    3953                 :            : }
    3954                 :            : 
    3955                 :          3 : static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
    3956                 :            : {
    3957                 :          3 :         struct cgroup_subsys *ss = cfts[0].ss;
    3958                 :          3 :         struct cgroup *root = &ss->root->cgrp;
    3959                 :            :         struct cgroup_subsys_state *css;
    3960                 :            :         int ret = 0;
    3961                 :            : 
    3962                 :            :         lockdep_assert_held(&cgroup_mutex);
    3963                 :            : 
    3964                 :            :         /* add/rm files for all cgroups created before */
    3965                 :          3 :         css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
    3966                 :          3 :                 struct cgroup *cgrp = css->cgroup;
    3967                 :            : 
    3968                 :          3 :                 if (!(css->flags & CSS_VISIBLE))
    3969                 :          3 :                         continue;
    3970                 :            : 
    3971                 :          0 :                 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
    3972                 :          0 :                 if (ret)
    3973                 :            :                         break;
    3974                 :            :         }
    3975                 :            : 
    3976                 :          3 :         if (is_add && !ret)
    3977                 :          3 :                 kernfs_activate(root->kn);
    3978                 :          3 :         return ret;
    3979                 :            : }
    3980                 :            : 
    3981                 :          0 : static void cgroup_exit_cftypes(struct cftype *cfts)
    3982                 :            : {
    3983                 :            :         struct cftype *cft;
    3984                 :            : 
    3985                 :          0 :         for (cft = cfts; cft->name[0] != '\0'; cft++) {
    3986                 :            :                 /* free copy for custom atomic_write_len, see init_cftypes() */
    3987                 :          0 :                 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
    3988                 :          0 :                         kfree(cft->kf_ops);
    3989                 :          0 :                 cft->kf_ops = NULL;
    3990                 :          0 :                 cft->ss = NULL;
    3991                 :            : 
    3992                 :            :                 /* revert flags set by cgroup core while adding @cfts */
    3993                 :          0 :                 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
    3994                 :            :         }
    3995                 :          0 : }
    3996                 :            : 
    3997                 :          3 : static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
    3998                 :            : {
    3999                 :            :         struct cftype *cft;
    4000                 :            : 
    4001                 :          3 :         for (cft = cfts; cft->name[0] != '\0'; cft++) {
    4002                 :            :                 struct kernfs_ops *kf_ops;
    4003                 :            : 
    4004                 :          3 :                 WARN_ON(cft->ss || cft->kf_ops);
    4005                 :            : 
    4006                 :          3 :                 if (cft->seq_start)
    4007                 :            :                         kf_ops = &cgroup_kf_ops;
    4008                 :            :                 else
    4009                 :            :                         kf_ops = &cgroup_kf_single_ops;
    4010                 :            : 
    4011                 :            :                 /*
    4012                 :            :                  * Ugh... if @cft wants a custom max_write_len, we need to
    4013                 :            :                  * make a copy of kf_ops to set its atomic_write_len.
    4014                 :            :                  */
    4015                 :          3 :                 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
    4016                 :          3 :                         kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
    4017                 :          3 :                         if (!kf_ops) {
    4018                 :          0 :                                 cgroup_exit_cftypes(cfts);
    4019                 :          0 :                                 return -ENOMEM;
    4020                 :            :                         }
    4021                 :          3 :                         kf_ops->atomic_write_len = cft->max_write_len;
    4022                 :            :                 }
    4023                 :            : 
    4024                 :          3 :                 cft->kf_ops = kf_ops;
    4025                 :          3 :                 cft->ss = ss;
    4026                 :            :         }
    4027                 :            : 
    4028                 :            :         return 0;
    4029                 :            : }
    4030                 :            : 
    4031                 :          0 : static int cgroup_rm_cftypes_locked(struct cftype *cfts)
    4032                 :            : {
    4033                 :            :         lockdep_assert_held(&cgroup_mutex);
    4034                 :            : 
    4035                 :          0 :         if (!cfts || !cfts[0].ss)
    4036                 :            :                 return -ENOENT;
    4037                 :            : 
    4038                 :            :         list_del(&cfts->node);
    4039                 :          0 :         cgroup_apply_cftypes(cfts, false);
    4040                 :          0 :         cgroup_exit_cftypes(cfts);
    4041                 :          0 :         return 0;
    4042                 :            : }
    4043                 :            : 
    4044                 :            : /**
    4045                 :            :  * cgroup_rm_cftypes - remove an array of cftypes from a subsystem
    4046                 :            :  * @cfts: zero-length name terminated array of cftypes
    4047                 :            :  *
    4048                 :            :  * Unregister @cfts.  Files described by @cfts are removed from all
    4049                 :            :  * existing cgroups and all future cgroups won't have them either.  This
    4050                 :            :  * function can be called anytime whether @cfts' subsys is attached or not.
    4051                 :            :  *
    4052                 :            :  * Returns 0 on successful unregistration, -ENOENT if @cfts is not
    4053                 :            :  * registered.
    4054                 :            :  */
    4055                 :          0 : int cgroup_rm_cftypes(struct cftype *cfts)
    4056                 :            : {
    4057                 :            :         int ret;
    4058                 :            : 
    4059                 :          0 :         mutex_lock(&cgroup_mutex);
    4060                 :          0 :         ret = cgroup_rm_cftypes_locked(cfts);
    4061                 :          0 :         mutex_unlock(&cgroup_mutex);
    4062                 :          0 :         return ret;
    4063                 :            : }
    4064                 :            : 
    4065                 :            : /**
    4066                 :            :  * cgroup_add_cftypes - add an array of cftypes to a subsystem
    4067                 :            :  * @ss: target cgroup subsystem
    4068                 :            :  * @cfts: zero-length name terminated array of cftypes
    4069                 :            :  *
    4070                 :            :  * Register @cfts to @ss.  Files described by @cfts are created for all
    4071                 :            :  * existing cgroups to which @ss is attached and all future cgroups will
    4072                 :            :  * have them too.  This function can be called anytime whether @ss is
    4073                 :            :  * attached or not.
    4074                 :            :  *
    4075                 :            :  * Returns 0 on successful registration, -errno on failure.  Note that this
    4076                 :            :  * function currently returns 0 as long as @cfts registration is successful
    4077                 :            :  * even if some file creation attempts on existing cgroups fail.
    4078                 :            :  */
    4079                 :          3 : static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
    4080                 :            : {
    4081                 :            :         int ret;
    4082                 :            : 
    4083                 :          3 :         if (!cgroup_ssid_enabled(ss->id))
    4084                 :            :                 return 0;
    4085                 :            : 
    4086                 :          3 :         if (!cfts || cfts[0].name[0] == '\0')
    4087                 :            :                 return 0;
    4088                 :            : 
    4089                 :          3 :         ret = cgroup_init_cftypes(ss, cfts);
    4090                 :          3 :         if (ret)
    4091                 :            :                 return ret;
    4092                 :            : 
    4093                 :          3 :         mutex_lock(&cgroup_mutex);
    4094                 :            : 
    4095                 :          3 :         list_add_tail(&cfts->node, &ss->cfts);
    4096                 :          3 :         ret = cgroup_apply_cftypes(cfts, true);
    4097                 :          3 :         if (ret)
    4098                 :          0 :                 cgroup_rm_cftypes_locked(cfts);
    4099                 :            : 
    4100                 :          3 :         mutex_unlock(&cgroup_mutex);
    4101                 :          3 :         return ret;
    4102                 :            : }
    4103                 :            : 
    4104                 :            : /**
    4105                 :            :  * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy
    4106                 :            :  * @ss: target cgroup subsystem
    4107                 :            :  * @cfts: zero-length name terminated array of cftypes
    4108                 :            :  *
    4109                 :            :  * Similar to cgroup_add_cftypes() but the added files are only used for
    4110                 :            :  * the default hierarchy.
    4111                 :            :  */
    4112                 :          3 : int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
    4113                 :            : {
    4114                 :            :         struct cftype *cft;
    4115                 :            : 
    4116                 :          3 :         for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
    4117                 :          3 :                 cft->flags |= __CFTYPE_ONLY_ON_DFL;
    4118                 :          3 :         return cgroup_add_cftypes(ss, cfts);
    4119                 :            : }
    4120                 :            : 
    4121                 :            : /**
    4122                 :            :  * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies
    4123                 :            :  * @ss: target cgroup subsystem
    4124                 :            :  * @cfts: zero-length name terminated array of cftypes
    4125                 :            :  *
    4126                 :            :  * Similar to cgroup_add_cftypes() but the added files are only used for
    4127                 :            :  * the legacy hierarchies.
    4128                 :            :  */
    4129                 :          3 : int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
    4130                 :            : {
    4131                 :            :         struct cftype *cft;
    4132                 :            : 
    4133                 :          3 :         for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
    4134                 :          3 :                 cft->flags |= __CFTYPE_NOT_ON_DFL;
    4135                 :          3 :         return cgroup_add_cftypes(ss, cfts);
    4136                 :            : }
    4137                 :            : 
    4138                 :            : /**
    4139                 :            :  * cgroup_file_notify - generate a file modified event for a cgroup_file
    4140                 :            :  * @cfile: target cgroup_file
    4141                 :            :  *
    4142                 :            :  * @cfile must have been obtained by setting cftype->file_offset.
    4143                 :            :  */
    4144                 :          3 : void cgroup_file_notify(struct cgroup_file *cfile)
    4145                 :            : {
    4146                 :            :         unsigned long flags;
    4147                 :            : 
    4148                 :          3 :         spin_lock_irqsave(&cgroup_file_kn_lock, flags);
    4149                 :          3 :         if (cfile->kn) {
    4150                 :          3 :                 unsigned long last = cfile->notified_at;
    4151                 :          3 :                 unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
    4152                 :            : 
    4153                 :          3 :                 if (time_in_range(jiffies, last, next)) {
    4154                 :          3 :                         timer_reduce(&cfile->notify_timer, next);
    4155                 :            :                 } else {
    4156                 :          3 :                         kernfs_notify(cfile->kn);
    4157                 :          3 :                         cfile->notified_at = jiffies;
    4158                 :            :                 }
    4159                 :            :         }
    4160                 :            :         spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
    4161                 :          3 : }
    4162                 :            : 
    4163                 :            : /**
    4164                 :            :  * css_next_child - find the next child of a given css
    4165                 :            :  * @pos: the current position (%NULL to initiate traversal)
    4166                 :            :  * @parent: css whose children to walk
    4167                 :            :  *
    4168                 :            :  * This function returns the next child of @parent and should be called
    4169                 :            :  * under either cgroup_mutex or RCU read lock.  The only requirement is
    4170                 :            :  * that @parent and @pos are accessible.  The next sibling is guaranteed to
    4171                 :            :  * be returned regardless of their states.
    4172                 :            :  *
    4173                 :            :  * If a subsystem synchronizes ->css_online() and the start of iteration, a
    4174                 :            :  * css which finished ->css_online() is guaranteed to be visible in the
    4175                 :            :  * future iterations and will stay visible until the last reference is put.
    4176                 :            :  * A css which hasn't finished ->css_online() or already finished
    4177                 :            :  * ->css_offline() may show up during traversal.  It's each subsystem's
    4178                 :            :  * responsibility to synchronize against on/offlining.
    4179                 :            :  */
    4180                 :          3 : struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
    4181                 :            :                                            struct cgroup_subsys_state *parent)
    4182                 :            : {
    4183                 :            :         struct cgroup_subsys_state *next;
    4184                 :            : 
    4185                 :            :         cgroup_assert_mutex_or_rcu_locked();
    4186                 :            : 
    4187                 :            :         /*
    4188                 :            :          * @pos could already have been unlinked from the sibling list.
    4189                 :            :          * Once a cgroup is removed, its ->sibling.next is no longer
    4190                 :            :          * updated when its next sibling changes.  CSS_RELEASED is set when
    4191                 :            :          * @pos is taken off list, at which time its next pointer is valid,
    4192                 :            :          * and, as releases are serialized, the one pointed to by the next
    4193                 :            :          * pointer is guaranteed to not have started release yet.  This
    4194                 :            :          * implies that if we observe !CSS_RELEASED on @pos in this RCU
    4195                 :            :          * critical section, the one pointed to by its next pointer is
    4196                 :            :          * guaranteed to not have finished its RCU grace period even if we
    4197                 :            :          * have dropped rcu_read_lock() inbetween iterations.
    4198                 :            :          *
    4199                 :            :          * If @pos has CSS_RELEASED set, its next pointer can't be
    4200                 :            :          * dereferenced; however, as each css is given a monotonically
    4201                 :            :          * increasing unique serial number and always appended to the
    4202                 :            :          * sibling list, the next one can be found by walking the parent's
    4203                 :            :          * children until the first css with higher serial number than
    4204                 :            :          * @pos's.  While this path can be slower, it happens iff iteration
    4205                 :            :          * races against release and the race window is very small.
    4206                 :            :          */
    4207                 :          3 :         if (!pos) {
    4208                 :          3 :                 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
    4209                 :          0 :         } else if (likely(!(pos->flags & CSS_RELEASED))) {
    4210                 :          0 :                 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
    4211                 :            :         } else {
    4212                 :          0 :                 list_for_each_entry_rcu(next, &parent->children, sibling)
    4213                 :          0 :                         if (next->serial_nr > pos->serial_nr)
    4214                 :            :                                 break;
    4215                 :            :         }
    4216                 :            : 
    4217                 :            :         /*
    4218                 :            :          * @next, if not pointing to the head, can be dereferenced and is
    4219                 :            :          * the next sibling.
    4220                 :            :          */
    4221                 :          3 :         if (&next->sibling != &parent->children)
    4222                 :          0 :                 return next;
    4223                 :            :         return NULL;
    4224                 :            : }
    4225                 :            : 
    4226                 :            : /**
    4227                 :            :  * css_next_descendant_pre - find the next descendant for pre-order walk
    4228                 :            :  * @pos: the current position (%NULL to initiate traversal)
    4229                 :            :  * @root: css whose descendants to walk
    4230                 :            :  *
    4231                 :            :  * To be used by css_for_each_descendant_pre().  Find the next descendant
    4232                 :            :  * to visit for pre-order traversal of @root's descendants.  @root is
    4233                 :            :  * included in the iteration and the first node to be visited.
    4234                 :            :  *
    4235                 :            :  * While this function requires cgroup_mutex or RCU read locking, it
    4236                 :            :  * doesn't require the whole traversal to be contained in a single critical
    4237                 :            :  * section.  This function will return the correct next descendant as long
    4238                 :            :  * as both @pos and @root are accessible and @pos is a descendant of @root.
    4239                 :            :  *
    4240                 :            :  * If a subsystem synchronizes ->css_online() and the start of iteration, a
    4241                 :            :  * css which finished ->css_online() is guaranteed to be visible in the
    4242                 :            :  * future iterations and will stay visible until the last reference is put.
    4243                 :            :  * A css which hasn't finished ->css_online() or already finished
    4244                 :            :  * ->css_offline() may show up during traversal.  It's each subsystem's
    4245                 :            :  * responsibility to synchronize against on/offlining.
    4246                 :            :  */
    4247                 :            : struct cgroup_subsys_state *
    4248                 :          3 : css_next_descendant_pre(struct cgroup_subsys_state *pos,
    4249                 :            :                         struct cgroup_subsys_state *root)
    4250                 :            : {
    4251                 :            :         struct cgroup_subsys_state *next;
    4252                 :            : 
    4253                 :            :         cgroup_assert_mutex_or_rcu_locked();
    4254                 :            : 
    4255                 :            :         /* if first iteration, visit @root */
    4256                 :          3 :         if (!pos)
    4257                 :            :                 return root;
    4258                 :            : 
    4259                 :            :         /* visit the first child if exists */
    4260                 :          3 :         next = css_next_child(NULL, pos);
    4261                 :          3 :         if (next)
    4262                 :            :                 return next;
    4263                 :            : 
    4264                 :            :         /* no child, visit my or the closest ancestor's next sibling */
    4265                 :          3 :         while (pos != root) {
    4266                 :          0 :                 next = css_next_child(pos, pos->parent);
    4267                 :          0 :                 if (next)
    4268                 :          0 :                         return next;
    4269                 :          0 :                 pos = pos->parent;
    4270                 :            :         }
    4271                 :            : 
    4272                 :            :         return NULL;
    4273                 :            : }
    4274                 :            : EXPORT_SYMBOL_GPL(css_next_descendant_pre);
    4275                 :            : 
    4276                 :            : /**
    4277                 :            :  * css_rightmost_descendant - return the rightmost descendant of a css
    4278                 :            :  * @pos: css of interest
    4279                 :            :  *
    4280                 :            :  * Return the rightmost descendant of @pos.  If there's no descendant, @pos
    4281                 :            :  * is returned.  This can be used during pre-order traversal to skip
    4282                 :            :  * subtree of @pos.
    4283                 :            :  *
    4284                 :            :  * While this function requires cgroup_mutex or RCU read locking, it
    4285                 :            :  * doesn't require the whole traversal to be contained in a single critical
    4286                 :            :  * section.  This function will return the correct rightmost descendant as
    4287                 :            :  * long as @pos is accessible.
    4288                 :            :  */
    4289                 :            : struct cgroup_subsys_state *
    4290                 :          0 : css_rightmost_descendant(struct cgroup_subsys_state *pos)
    4291                 :            : {
    4292                 :            :         struct cgroup_subsys_state *last, *tmp;
    4293                 :            : 
    4294                 :            :         cgroup_assert_mutex_or_rcu_locked();
    4295                 :            : 
    4296                 :            :         do {
    4297                 :            :                 last = pos;
    4298                 :            :                 /* ->prev isn't RCU safe, walk ->next till the end */
    4299                 :            :                 pos = NULL;
    4300                 :          0 :                 css_for_each_child(tmp, last)
    4301                 :            :                         pos = tmp;
    4302                 :          0 :         } while (pos);
    4303                 :            : 
    4304                 :          0 :         return last;
    4305                 :            : }
    4306                 :            : 
    4307                 :            : static struct cgroup_subsys_state *
    4308                 :            : css_leftmost_descendant(struct cgroup_subsys_state *pos)
    4309                 :            : {
    4310                 :            :         struct cgroup_subsys_state *last;
    4311                 :            : 
    4312                 :            :         do {
    4313                 :            :                 last = pos;
    4314                 :          3 :                 pos = css_next_child(NULL, pos);
    4315                 :          3 :         } while (pos);
    4316                 :            : 
    4317                 :          3 :         return last;
    4318                 :            : }
    4319                 :            : 
    4320                 :            : /**
    4321                 :            :  * css_next_descendant_post - find the next descendant for post-order walk
    4322                 :            :  * @pos: the current position (%NULL to initiate traversal)
    4323                 :            :  * @root: css whose descendants to walk
    4324                 :            :  *
    4325                 :            :  * To be used by css_for_each_descendant_post().  Find the next descendant
    4326                 :            :  * to visit for post-order traversal of @root's descendants.  @root is
    4327                 :            :  * included in the iteration and the last node to be visited.
    4328                 :            :  *
    4329                 :            :  * While this function requires cgroup_mutex or RCU read locking, it
    4330                 :            :  * doesn't require the whole traversal to be contained in a single critical
    4331                 :            :  * section.  This function will return the correct next descendant as long
    4332                 :            :  * as both @pos and @cgroup are accessible and @pos is a descendant of
    4333                 :            :  * @cgroup.
    4334                 :            :  *
    4335                 :            :  * If a subsystem synchronizes ->css_online() and the start of iteration, a
    4336                 :            :  * css which finished ->css_online() is guaranteed to be visible in the
    4337                 :            :  * future iterations and will stay visible until the last reference is put.
    4338                 :            :  * A css which hasn't finished ->css_online() or already finished
    4339                 :            :  * ->css_offline() may show up during traversal.  It's each subsystem's
    4340                 :            :  * responsibility to synchronize against on/offlining.
    4341                 :            :  */
    4342                 :            : struct cgroup_subsys_state *
    4343                 :          3 : css_next_descendant_post(struct cgroup_subsys_state *pos,
    4344                 :            :                          struct cgroup_subsys_state *root)
    4345                 :            : {
    4346                 :            :         struct cgroup_subsys_state *next;
    4347                 :            : 
    4348                 :            :         cgroup_assert_mutex_or_rcu_locked();
    4349                 :            : 
    4350                 :            :         /* if first iteration, visit leftmost descendant which may be @root */
    4351                 :          3 :         if (!pos)
    4352                 :          3 :                 return css_leftmost_descendant(root);
    4353                 :            : 
    4354                 :            :         /* if we visited @root, we're done */
    4355                 :          3 :         if (pos == root)
    4356                 :            :                 return NULL;
    4357                 :            : 
    4358                 :            :         /* if there's an unvisited sibling, visit its leftmost descendant */
    4359                 :          0 :         next = css_next_child(pos, pos->parent);
    4360                 :          0 :         if (next)
    4361                 :          0 :                 return css_leftmost_descendant(next);
    4362                 :            : 
    4363                 :            :         /* no sibling left, visit parent */
    4364                 :          0 :         return pos->parent;
    4365                 :            : }
    4366                 :            : 
    4367                 :            : /**
    4368                 :            :  * css_has_online_children - does a css have online children
    4369                 :            :  * @css: the target css
    4370                 :            :  *
    4371                 :            :  * Returns %true if @css has any online children; otherwise, %false.  This
    4372                 :            :  * function can be called from any context but the caller is responsible
    4373                 :            :  * for synchronizing against on/offlining as necessary.
    4374                 :            :  */
    4375                 :          3 : bool css_has_online_children(struct cgroup_subsys_state *css)
    4376                 :            : {
    4377                 :            :         struct cgroup_subsys_state *child;
    4378                 :            :         bool ret = false;
    4379                 :            : 
    4380                 :            :         rcu_read_lock();
    4381                 :          3 :         css_for_each_child(child, css) {
    4382                 :          0 :                 if (child->flags & CSS_ONLINE) {
    4383                 :            :                         ret = true;
    4384                 :            :                         break;
    4385                 :            :                 }
    4386                 :            :         }
    4387                 :            :         rcu_read_unlock();
    4388                 :          3 :         return ret;
    4389                 :            : }
    4390                 :            : 
    4391                 :          3 : static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
    4392                 :            : {
    4393                 :            :         struct list_head *l;
    4394                 :            :         struct cgrp_cset_link *link;
    4395                 :            :         struct css_set *cset;
    4396                 :            : 
    4397                 :            :         lockdep_assert_held(&css_set_lock);
    4398                 :            : 
    4399                 :            :         /* find the next threaded cset */
    4400                 :          3 :         if (it->tcset_pos) {
    4401                 :          3 :                 l = it->tcset_pos->next;
    4402                 :            : 
    4403                 :          3 :                 if (l != it->tcset_head) {
    4404                 :          0 :                         it->tcset_pos = l;
    4405                 :          0 :                         return container_of(l, struct css_set,
    4406                 :            :                                             threaded_csets_node);
    4407                 :            :                 }
    4408                 :            : 
    4409                 :          3 :                 it->tcset_pos = NULL;
    4410                 :            :         }
    4411                 :            : 
    4412                 :            :         /* find the next cset */
    4413                 :          3 :         l = it->cset_pos;
    4414                 :          3 :         l = l->next;
    4415                 :          3 :         if (l == it->cset_head) {
    4416                 :          3 :                 it->cset_pos = NULL;
    4417                 :          3 :                 return NULL;
    4418                 :            :         }
    4419                 :            : 
    4420                 :          3 :         if (it->ss) {
    4421                 :          0 :                 cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
    4422                 :            :         } else {
    4423                 :            :                 link = list_entry(l, struct cgrp_cset_link, cset_link);
    4424                 :          3 :                 cset = link->cset;
    4425                 :            :         }
    4426                 :            : 
    4427                 :          3 :         it->cset_pos = l;
    4428                 :            : 
    4429                 :            :         /* initialize threaded css_set walking */
    4430                 :          3 :         if (it->flags & CSS_TASK_ITER_THREADED) {
    4431                 :          3 :                 if (it->cur_dcset)
    4432                 :          0 :                         put_css_set_locked(it->cur_dcset);
    4433                 :          3 :                 it->cur_dcset = cset;
    4434                 :            :                 get_css_set(cset);
    4435                 :            : 
    4436                 :          3 :                 it->tcset_head = &cset->threaded_csets;
    4437                 :          3 :                 it->tcset_pos = &cset->threaded_csets;
    4438                 :            :         }
    4439                 :            : 
    4440                 :          3 :         return cset;
    4441                 :            : }
    4442                 :            : 
    4443                 :            : /**
    4444                 :            :  * css_task_iter_advance_css_set - advance a task itererator to the next css_set
    4445                 :            :  * @it: the iterator to advance
    4446                 :            :  *
    4447                 :            :  * Advance @it to the next css_set to walk.
    4448                 :            :  */
    4449                 :          3 : static void css_task_iter_advance_css_set(struct css_task_iter *it)
    4450                 :            : {
    4451                 :            :         struct css_set *cset;
    4452                 :            : 
    4453                 :            :         lockdep_assert_held(&css_set_lock);
    4454                 :            : 
    4455                 :            :         /* Advance to the next non-empty css_set */
    4456                 :            :         do {
    4457                 :          3 :                 cset = css_task_iter_next_css_set(it);
    4458                 :          3 :                 if (!cset) {
    4459                 :          3 :                         it->task_pos = NULL;
    4460                 :          3 :                         return;
    4461                 :            :                 }
    4462                 :          3 :         } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
    4463                 :            : 
    4464                 :          3 :         if (!list_empty(&cset->tasks)) {
    4465                 :          3 :                 it->task_pos = cset->tasks.next;
    4466                 :          3 :                 it->cur_tasks_head = &cset->tasks;
    4467                 :          3 :         } else if (!list_empty(&cset->mg_tasks)) {
    4468                 :          0 :                 it->task_pos = cset->mg_tasks.next;
    4469                 :          0 :                 it->cur_tasks_head = &cset->mg_tasks;
    4470                 :            :         } else {
    4471                 :          3 :                 it->task_pos = cset->dying_tasks.next;
    4472                 :          3 :                 it->cur_tasks_head = &cset->dying_tasks;
    4473                 :            :         }
    4474                 :            : 
    4475                 :          3 :         it->tasks_head = &cset->tasks;
    4476                 :          3 :         it->mg_tasks_head = &cset->mg_tasks;
    4477                 :          3 :         it->dying_tasks_head = &cset->dying_tasks;
    4478                 :            : 
    4479                 :            :         /*
    4480                 :            :          * We don't keep css_sets locked across iteration steps and thus
    4481                 :            :          * need to take steps to ensure that iteration can be resumed after
    4482                 :            :          * the lock is re-acquired.  Iteration is performed at two levels -
    4483                 :            :          * css_sets and tasks in them.
    4484                 :            :          *
    4485                 :            :          * Once created, a css_set never leaves its cgroup lists, so a
    4486                 :            :          * pinned css_set is guaranteed to stay put and we can resume
    4487                 :            :          * iteration afterwards.
    4488                 :            :          *
    4489                 :            :          * Tasks may leave @cset across iteration steps.  This is resolved
    4490                 :            :          * by registering each iterator with the css_set currently being
    4491                 :            :          * walked and making css_set_move_task() advance iterators whose
    4492                 :            :          * next task is leaving.
    4493                 :            :          */
    4494                 :          3 :         if (it->cur_cset) {
    4495                 :            :                 list_del(&it->iters_node);
    4496                 :          0 :                 put_css_set_locked(it->cur_cset);
    4497                 :            :         }
    4498                 :            :         get_css_set(cset);
    4499                 :          3 :         it->cur_cset = cset;
    4500                 :          3 :         list_add(&it->iters_node, &cset->task_iters);
    4501                 :            : }
    4502                 :            : 
    4503                 :            : static void css_task_iter_skip(struct css_task_iter *it,
    4504                 :            :                                struct task_struct *task)
    4505                 :            : {
    4506                 :            :         lockdep_assert_held(&css_set_lock);
    4507                 :            : 
    4508                 :          3 :         if (it->task_pos == &task->cg_list) {
    4509                 :          0 :                 it->task_pos = it->task_pos->next;
    4510                 :          0 :                 it->flags |= CSS_TASK_ITER_SKIPPED;
    4511                 :            :         }
    4512                 :            : }
    4513                 :            : 
    4514                 :          3 : static void css_task_iter_advance(struct css_task_iter *it)
    4515                 :            : {
    4516                 :            :         struct task_struct *task;
    4517                 :            : 
    4518                 :            :         lockdep_assert_held(&css_set_lock);
    4519                 :            : repeat:
    4520                 :          3 :         if (it->task_pos) {
    4521                 :            :                 /*
    4522                 :            :                  * Advance iterator to find next entry.  cset->tasks is
    4523                 :            :                  * consumed first and then ->mg_tasks.  After ->mg_tasks,
    4524                 :            :                  * we move onto the next cset.
    4525                 :            :                  */
    4526                 :          3 :                 if (it->flags & CSS_TASK_ITER_SKIPPED)
    4527                 :          0 :                         it->flags &= ~CSS_TASK_ITER_SKIPPED;
    4528                 :            :                 else
    4529                 :          3 :                         it->task_pos = it->task_pos->next;
    4530                 :            : 
    4531                 :          3 :                 if (it->task_pos == it->tasks_head) {
    4532                 :          3 :                         it->task_pos = it->mg_tasks_head->next;
    4533                 :          3 :                         it->cur_tasks_head = it->mg_tasks_head;
    4534                 :            :                 }
    4535                 :          3 :                 if (it->task_pos == it->mg_tasks_head) {
    4536                 :          3 :                         it->task_pos = it->dying_tasks_head->next;
    4537                 :          3 :                         it->cur_tasks_head = it->dying_tasks_head;
    4538                 :            :                 }
    4539                 :          3 :                 if (it->task_pos == it->dying_tasks_head)
    4540                 :          3 :                         css_task_iter_advance_css_set(it);
    4541                 :            :         } else {
    4542                 :            :                 /* called from start, proceed to the first cset */
    4543                 :          3 :                 css_task_iter_advance_css_set(it);
    4544                 :            :         }
    4545                 :            : 
    4546                 :          3 :         if (!it->task_pos)
    4547                 :          3 :                 return;
    4548                 :            : 
    4549                 :            :         task = list_entry(it->task_pos, struct task_struct, cg_list);
    4550                 :            : 
    4551                 :          3 :         if (it->flags & CSS_TASK_ITER_PROCS) {
    4552                 :            :                 /* if PROCS, skip over tasks which aren't group leaders */
    4553                 :          3 :                 if (!thread_group_leader(task))
    4554                 :            :                         goto repeat;
    4555                 :            : 
    4556                 :            :                 /* and dying leaders w/o live member threads */
    4557                 :          3 :                 if (it->cur_tasks_head == it->dying_tasks_head &&
    4558                 :          3 :                     !atomic_read(&task->signal->live))
    4559                 :            :                         goto repeat;
    4560                 :            :         } else {
    4561                 :            :                 /* skip all dying ones */
    4562                 :          0 :                 if (it->cur_tasks_head == it->dying_tasks_head)
    4563                 :            :                         goto repeat;
    4564                 :            :         }
    4565                 :            : }
    4566                 :            : 
    4567                 :            : /**
    4568                 :            :  * css_task_iter_start - initiate task iteration
    4569                 :            :  * @css: the css to walk tasks of
    4570                 :            :  * @flags: CSS_TASK_ITER_* flags
    4571                 :            :  * @it: the task iterator to use
    4572                 :            :  *
    4573                 :            :  * Initiate iteration through the tasks of @css.  The caller can call
    4574                 :            :  * css_task_iter_next() to walk through the tasks until the function
    4575                 :            :  * returns NULL.  On completion of iteration, css_task_iter_end() must be
    4576                 :            :  * called.
    4577                 :            :  */
    4578                 :          3 : void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
    4579                 :            :                          struct css_task_iter *it)
    4580                 :            : {
    4581                 :            :         /* no one should try to iterate before mounting cgroups */
    4582                 :          3 :         WARN_ON_ONCE(!use_task_css_set_links);
    4583                 :            : 
    4584                 :          3 :         memset(it, 0, sizeof(*it));
    4585                 :            : 
    4586                 :            :         spin_lock_irq(&css_set_lock);
    4587                 :            : 
    4588                 :          3 :         it->ss = css->ss;
    4589                 :          3 :         it->flags = flags;
    4590                 :            : 
    4591                 :          3 :         if (it->ss)
    4592                 :          0 :                 it->cset_pos = &css->cgroup->e_csets[css->ss->id];
    4593                 :            :         else
    4594                 :          3 :                 it->cset_pos = &css->cgroup->cset_links;
    4595                 :            : 
    4596                 :          3 :         it->cset_head = it->cset_pos;
    4597                 :            : 
    4598                 :          3 :         css_task_iter_advance(it);
    4599                 :            : 
    4600                 :            :         spin_unlock_irq(&css_set_lock);
    4601                 :          3 : }
    4602                 :            : 
    4603                 :            : /**
    4604                 :            :  * css_task_iter_next - return the next task for the iterator
    4605                 :            :  * @it: the task iterator being iterated
    4606                 :            :  *
    4607                 :            :  * The "next" function for task iteration.  @it should have been
    4608                 :            :  * initialized via css_task_iter_start().  Returns NULL when the iteration
    4609                 :            :  * reaches the end.
    4610                 :            :  */
    4611                 :          3 : struct task_struct *css_task_iter_next(struct css_task_iter *it)
    4612                 :            : {
    4613                 :          3 :         if (it->cur_task) {
    4614                 :          3 :                 put_task_struct(it->cur_task);
    4615                 :          3 :                 it->cur_task = NULL;
    4616                 :            :         }
    4617                 :            : 
    4618                 :            :         spin_lock_irq(&css_set_lock);
    4619                 :            : 
    4620                 :            :         /* @it may be half-advanced by skips, finish advancing */
    4621                 :          3 :         if (it->flags & CSS_TASK_ITER_SKIPPED)
    4622                 :          0 :                 css_task_iter_advance(it);
    4623                 :            : 
    4624                 :          3 :         if (it->task_pos) {
    4625                 :          3 :                 it->cur_task = list_entry(it->task_pos, struct task_struct,
    4626                 :            :                                           cg_list);
    4627                 :            :                 get_task_struct(it->cur_task);
    4628                 :          3 :                 css_task_iter_advance(it);
    4629                 :            :         }
    4630                 :            : 
    4631                 :            :         spin_unlock_irq(&css_set_lock);
    4632                 :            : 
    4633                 :          3 :         return it->cur_task;
    4634                 :            : }
    4635                 :            : 
    4636                 :            : /**
    4637                 :            :  * css_task_iter_end - finish task iteration
    4638                 :            :  * @it: the task iterator to finish
    4639                 :            :  *
    4640                 :            :  * Finish task iteration started by css_task_iter_start().
    4641                 :            :  */
    4642                 :          3 : void css_task_iter_end(struct css_task_iter *it)
    4643                 :            : {
    4644                 :          3 :         if (it->cur_cset) {
    4645                 :            :                 spin_lock_irq(&css_set_lock);
    4646                 :            :                 list_del(&it->iters_node);
    4647                 :          3 :                 put_css_set_locked(it->cur_cset);
    4648                 :            :                 spin_unlock_irq(&css_set_lock);
    4649                 :            :         }
    4650                 :            : 
    4651                 :          3 :         if (it->cur_dcset)
    4652                 :          3 :                 put_css_set(it->cur_dcset);
    4653                 :            : 
    4654                 :          3 :         if (it->cur_task)
    4655                 :          0 :                 put_task_struct(it->cur_task);
    4656                 :          3 : }
    4657                 :            : 
    4658                 :          3 : static void cgroup_procs_release(struct kernfs_open_file *of)
    4659                 :            : {
    4660                 :          3 :         if (of->priv) {
    4661                 :          3 :                 css_task_iter_end(of->priv);
    4662                 :          3 :                 kfree(of->priv);
    4663                 :            :         }
    4664                 :          3 : }
    4665                 :            : 
    4666                 :          3 : static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
    4667                 :            : {
    4668                 :          3 :         struct kernfs_open_file *of = s->private;
    4669                 :          3 :         struct css_task_iter *it = of->priv;
    4670                 :            : 
    4671                 :          3 :         if (pos)
    4672                 :          3 :                 (*pos)++;
    4673                 :            : 
    4674                 :          3 :         return css_task_iter_next(it);
    4675                 :            : }
    4676                 :            : 
    4677                 :          3 : static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
    4678                 :            :                                   unsigned int iter_flags)
    4679                 :            : {
    4680                 :          3 :         struct kernfs_open_file *of = s->private;
    4681                 :          3 :         struct cgroup *cgrp = seq_css(s)->cgroup;
    4682                 :          3 :         struct css_task_iter *it = of->priv;
    4683                 :            : 
    4684                 :            :         /*
    4685                 :            :          * When a seq_file is seeked, it's always traversed sequentially
    4686                 :            :          * from position 0, so we can simply keep iterating on !0 *pos.
    4687                 :            :          */
    4688                 :          3 :         if (!it) {
    4689                 :          3 :                 if (WARN_ON_ONCE((*pos)))
    4690                 :            :                         return ERR_PTR(-EINVAL);
    4691                 :            : 
    4692                 :          3 :                 it = kzalloc(sizeof(*it), GFP_KERNEL);
    4693                 :          3 :                 if (!it)
    4694                 :            :                         return ERR_PTR(-ENOMEM);
    4695                 :          3 :                 of->priv = it;
    4696                 :          3 :                 css_task_iter_start(&cgrp->self, iter_flags, it);
    4697                 :          3 :         } else if (!(*pos)) {
    4698                 :          0 :                 css_task_iter_end(it);
    4699                 :          0 :                 css_task_iter_start(&cgrp->self, iter_flags, it);
    4700                 :            :         } else
    4701                 :          3 :                 return it->cur_task;
    4702                 :            : 
    4703                 :          3 :         return cgroup_procs_next(s, NULL, NULL);
    4704                 :            : }
    4705                 :            : 
    4706                 :          3 : static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
    4707                 :            : {
    4708                 :          3 :         struct cgroup *cgrp = seq_css(s)->cgroup;
    4709                 :            : 
    4710                 :            :         /*
    4711                 :            :          * All processes of a threaded subtree belong to the domain cgroup
    4712                 :            :          * of the subtree.  Only threads can be distributed across the
    4713                 :            :          * subtree.  Reject reads on cgroup.procs in the subtree proper.
    4714                 :            :          * They're always empty anyway.
    4715                 :            :          */
    4716                 :          3 :         if (cgroup_is_threaded(cgrp))
    4717                 :            :                 return ERR_PTR(-EOPNOTSUPP);
    4718                 :            : 
    4719                 :          3 :         return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
    4720                 :            :                                             CSS_TASK_ITER_THREADED);
    4721                 :            : }
    4722                 :            : 
    4723                 :          3 : static int cgroup_procs_show(struct seq_file *s, void *v)
    4724                 :            : {
    4725                 :          3 :         seq_printf(s, "%d\n", task_pid_vnr(v));
    4726                 :          3 :         return 0;
    4727                 :            : }
    4728                 :            : 
    4729                 :          3 : static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
    4730                 :            :                                          struct cgroup *dst_cgrp,
    4731                 :            :                                          struct super_block *sb)
    4732                 :            : {
    4733                 :          3 :         struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
    4734                 :            :         struct cgroup *com_cgrp = src_cgrp;
    4735                 :            :         struct inode *inode;
    4736                 :            :         int ret;
    4737                 :            : 
    4738                 :            :         lockdep_assert_held(&cgroup_mutex);
    4739                 :            : 
    4740                 :            :         /* find the common ancestor */
    4741                 :          3 :         while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
    4742                 :            :                 com_cgrp = cgroup_parent(com_cgrp);
    4743                 :            : 
    4744                 :            :         /* %current should be authorized to migrate to the common ancestor */
    4745                 :          3 :         inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn);
    4746                 :          3 :         if (!inode)
    4747                 :            :                 return -ENOMEM;
    4748                 :            : 
    4749                 :          3 :         ret = inode_permission(inode, MAY_WRITE);
    4750                 :          3 :         iput(inode);
    4751                 :          3 :         if (ret)
    4752                 :            :                 return ret;
    4753                 :            : 
    4754                 :            :         /*
    4755                 :            :          * If namespaces are delegation boundaries, %current must be able
    4756                 :            :          * to see both source and destination cgroups from its namespace.
    4757                 :            :          */
    4758                 :          3 :         if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
    4759                 :          3 :             (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
    4760                 :            :              !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
    4761                 :            :                 return -ENOENT;
    4762                 :            : 
    4763                 :          3 :         return 0;
    4764                 :            : }
    4765                 :            : 
    4766                 :          3 : static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
    4767                 :            :                                   char *buf, size_t nbytes, loff_t off)
    4768                 :            : {
    4769                 :            :         struct cgroup *src_cgrp, *dst_cgrp;
    4770                 :            :         struct task_struct *task;
    4771                 :            :         ssize_t ret;
    4772                 :            : 
    4773                 :          3 :         dst_cgrp = cgroup_kn_lock_live(of->kn, false);
    4774                 :          3 :         if (!dst_cgrp)
    4775                 :            :                 return -ENODEV;
    4776                 :            : 
    4777                 :          3 :         task = cgroup_procs_write_start(buf, true);
    4778                 :            :         ret = PTR_ERR_OR_ZERO(task);
    4779                 :          3 :         if (ret)
    4780                 :            :                 goto out_unlock;
    4781                 :            : 
    4782                 :            :         /* find the source cgroup */
    4783                 :            :         spin_lock_irq(&css_set_lock);
    4784                 :            :         src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
    4785                 :            :         spin_unlock_irq(&css_set_lock);
    4786                 :            : 
    4787                 :          3 :         ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
    4788                 :          3 :                                             of->file->f_path.dentry->d_sb);
    4789                 :          3 :         if (ret)
    4790                 :            :                 goto out_finish;
    4791                 :            : 
    4792                 :          3 :         ret = cgroup_attach_task(dst_cgrp, task, true);
    4793                 :            : 
    4794                 :            : out_finish:
    4795                 :          3 :         cgroup_procs_write_finish(task);
    4796                 :            : out_unlock:
    4797                 :          3 :         cgroup_kn_unlock(of->kn);
    4798                 :            : 
    4799                 :          3 :         return ret ?: nbytes;
    4800                 :            : }
    4801                 :            : 
    4802                 :          0 : static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
    4803                 :            : {
    4804                 :          0 :         return __cgroup_procs_start(s, pos, 0);
    4805                 :            : }
    4806                 :            : 
    4807                 :          0 : static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
    4808                 :            :                                     char *buf, size_t nbytes, loff_t off)
    4809                 :            : {
    4810                 :            :         struct cgroup *src_cgrp, *dst_cgrp;
    4811                 :            :         struct task_struct *task;
    4812                 :            :         ssize_t ret;
    4813                 :            : 
    4814                 :            :         buf = strstrip(buf);
    4815                 :            : 
    4816                 :          0 :         dst_cgrp = cgroup_kn_lock_live(of->kn, false);
    4817                 :          0 :         if (!dst_cgrp)
    4818                 :            :                 return -ENODEV;
    4819                 :            : 
    4820                 :          0 :         task = cgroup_procs_write_start(buf, false);
    4821                 :            :         ret = PTR_ERR_OR_ZERO(task);
    4822                 :          0 :         if (ret)
    4823                 :            :                 goto out_unlock;
    4824                 :            : 
    4825                 :            :         /* find the source cgroup */
    4826                 :            :         spin_lock_irq(&css_set_lock);
    4827                 :            :         src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
    4828                 :            :         spin_unlock_irq(&css_set_lock);
    4829                 :            : 
    4830                 :            :         /* thread migrations follow the cgroup.procs delegation rule */
    4831                 :          0 :         ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
    4832                 :          0 :                                             of->file->f_path.dentry->d_sb);
    4833                 :          0 :         if (ret)
    4834                 :            :                 goto out_finish;
    4835                 :            : 
    4836                 :            :         /* and must be contained in the same domain */
    4837                 :            :         ret = -EOPNOTSUPP;
    4838                 :          0 :         if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)
    4839                 :            :                 goto out_finish;
    4840                 :            : 
    4841                 :          0 :         ret = cgroup_attach_task(dst_cgrp, task, false);
    4842                 :            : 
    4843                 :            : out_finish:
    4844                 :          0 :         cgroup_procs_write_finish(task);
    4845                 :            : out_unlock:
    4846                 :          0 :         cgroup_kn_unlock(of->kn);
    4847                 :            : 
    4848                 :          0 :         return ret ?: nbytes;
    4849                 :            : }
    4850                 :            : 
    4851                 :            : /* cgroup core interface files for the default hierarchy */
    4852                 :            : static struct cftype cgroup_base_files[] = {
    4853                 :            :         {
    4854                 :            :                 .name = "cgroup.type",
    4855                 :            :                 .flags = CFTYPE_NOT_ON_ROOT,
    4856                 :            :                 .seq_show = cgroup_type_show,
    4857                 :            :                 .write = cgroup_type_write,
    4858                 :            :         },
    4859                 :            :         {
    4860                 :            :                 .name = "cgroup.procs",
    4861                 :            :                 .flags = CFTYPE_NS_DELEGATABLE,
    4862                 :            :                 .file_offset = offsetof(struct cgroup, procs_file),
    4863                 :            :                 .release = cgroup_procs_release,
    4864                 :            :                 .seq_start = cgroup_procs_start,
    4865                 :            :                 .seq_next = cgroup_procs_next,
    4866                 :            :                 .seq_show = cgroup_procs_show,
    4867                 :            :                 .write = cgroup_procs_write,
    4868                 :            :         },
    4869                 :            :         {
    4870                 :            :                 .name = "cgroup.threads",
    4871                 :            :                 .flags = CFTYPE_NS_DELEGATABLE,
    4872                 :            :                 .release = cgroup_procs_release,
    4873                 :            :                 .seq_start = cgroup_threads_start,
    4874                 :            :                 .seq_next = cgroup_procs_next,
    4875                 :            :                 .seq_show = cgroup_procs_show,
    4876                 :            :                 .write = cgroup_threads_write,
    4877                 :            :         },
    4878                 :            :         {
    4879                 :            :                 .name = "cgroup.controllers",
    4880                 :            :                 .seq_show = cgroup_controllers_show,
    4881                 :            :         },
    4882                 :            :         {
    4883                 :            :                 .name = "cgroup.subtree_control",
    4884                 :            :                 .flags = CFTYPE_NS_DELEGATABLE,
    4885                 :            :                 .seq_show = cgroup_subtree_control_show,
    4886                 :            :                 .write = cgroup_subtree_control_write,
    4887                 :            :         },
    4888                 :            :         {
    4889                 :            :                 .name = "cgroup.events",
    4890                 :            :                 .flags = CFTYPE_NOT_ON_ROOT,
    4891                 :            :                 .file_offset = offsetof(struct cgroup, events_file),
    4892                 :            :                 .seq_show = cgroup_events_show,
    4893                 :            :         },
    4894                 :            :         {
    4895                 :            :                 .name = "cgroup.max.descendants",
    4896                 :            :                 .seq_show = cgroup_max_descendants_show,
    4897                 :            :                 .write = cgroup_max_descendants_write,
    4898                 :            :         },
    4899                 :            :         {
    4900                 :            :                 .name = "cgroup.max.depth",
    4901                 :            :                 .seq_show = cgroup_max_depth_show,
    4902                 :            :                 .write = cgroup_max_depth_write,
    4903                 :            :         },
    4904                 :            :         {
    4905                 :            :                 .name = "cgroup.stat",
    4906                 :            :                 .seq_show = cgroup_stat_show,
    4907                 :            :         },
    4908                 :            :         {
    4909                 :            :                 .name = "cgroup.freeze",
    4910                 :            :                 .flags = CFTYPE_NOT_ON_ROOT,
    4911                 :            :                 .seq_show = cgroup_freeze_show,
    4912                 :            :                 .write = cgroup_freeze_write,
    4913                 :            :         },
    4914                 :            :         {
    4915                 :            :                 .name = "cpu.stat",
    4916                 :            :                 .flags = CFTYPE_NOT_ON_ROOT,
    4917                 :            :                 .seq_show = cpu_stat_show,
    4918                 :            :         },
    4919                 :            : #ifdef CONFIG_PSI
    4920                 :            :         {
    4921                 :            :                 .name = "io.pressure",
    4922                 :            :                 .seq_show = cgroup_io_pressure_show,
    4923                 :            :                 .write = cgroup_io_pressure_write,
    4924                 :            :                 .poll = cgroup_pressure_poll,
    4925                 :            :                 .release = cgroup_pressure_release,
    4926                 :            :         },
    4927                 :            :         {
    4928                 :            :                 .name = "memory.pressure",
    4929                 :            :                 .seq_show = cgroup_memory_pressure_show,
    4930                 :            :                 .write = cgroup_memory_pressure_write,
    4931                 :            :                 .poll = cgroup_pressure_poll,
    4932                 :            :                 .release = cgroup_pressure_release,
    4933                 :            :         },
    4934                 :            :         {
    4935                 :            :                 .name = "cpu.pressure",
    4936                 :            :                 .seq_show = cgroup_cpu_pressure_show,
    4937                 :            :                 .write = cgroup_cpu_pressure_write,
    4938                 :            :                 .poll = cgroup_pressure_poll,
    4939                 :            :                 .release = cgroup_pressure_release,
    4940                 :            :         },
    4941                 :            : #endif /* CONFIG_PSI */
    4942                 :            :         { }     /* terminate */
    4943                 :            : };
    4944                 :            : 
    4945                 :            : /*
    4946                 :            :  * css destruction is four-stage process.
    4947                 :            :  *
    4948                 :            :  * 1. Destruction starts.  Killing of the percpu_ref is initiated.
    4949                 :            :  *    Implemented in kill_css().
    4950                 :            :  *
    4951                 :            :  * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
    4952                 :            :  *    and thus css_tryget_online() is guaranteed to fail, the css can be
    4953                 :            :  *    offlined by invoking offline_css().  After offlining, the base ref is
    4954                 :            :  *    put.  Implemented in css_killed_work_fn().
    4955                 :            :  *
    4956                 :            :  * 3. When the percpu_ref reaches zero, the only possible remaining
    4957                 :            :  *    accessors are inside RCU read sections.  css_release() schedules the
    4958                 :            :  *    RCU callback.
    4959                 :            :  *
    4960                 :            :  * 4. After the grace period, the css can be freed.  Implemented in
    4961                 :            :  *    css_free_work_fn().
    4962                 :            :  *
    4963                 :            :  * It is actually hairier because both step 2 and 4 require process context
    4964                 :            :  * and thus involve punting to css->destroy_work adding two additional
    4965                 :            :  * steps to the already complex sequence.
    4966                 :            :  */
    4967                 :          3 : static void css_free_rwork_fn(struct work_struct *work)
    4968                 :            : {
    4969                 :          3 :         struct cgroup_subsys_state *css = container_of(to_rcu_work(work),
    4970                 :            :                                 struct cgroup_subsys_state, destroy_rwork);
    4971                 :          3 :         struct cgroup_subsys *ss = css->ss;
    4972                 :          3 :         struct cgroup *cgrp = css->cgroup;
    4973                 :            : 
    4974                 :          3 :         percpu_ref_exit(&css->refcnt);
    4975                 :            : 
    4976                 :          3 :         if (ss) {
    4977                 :            :                 /* css free path */
    4978                 :          3 :                 struct cgroup_subsys_state *parent = css->parent;
    4979                 :          3 :                 int id = css->id;
    4980                 :            : 
    4981                 :          3 :                 ss->css_free(css);
    4982                 :          3 :                 cgroup_idr_remove(&ss->css_idr, id);
    4983                 :            :                 cgroup_put(cgrp);
    4984                 :            : 
    4985                 :          3 :                 if (parent)
    4986                 :            :                         css_put(parent);
    4987                 :            :         } else {
    4988                 :            :                 /* cgroup free path */
    4989                 :          3 :                 atomic_dec(&cgrp->root->nr_cgrps);
    4990                 :          3 :                 cgroup1_pidlist_destroy_all(cgrp);
    4991                 :          3 :                 cancel_work_sync(&cgrp->release_agent_work);
    4992                 :            : 
    4993                 :          3 :                 if (cgroup_parent(cgrp)) {
    4994                 :            :                         /*
    4995                 :            :                          * We get a ref to the parent, and put the ref when
    4996                 :            :                          * this cgroup is being freed, so it's guaranteed
    4997                 :            :                          * that the parent won't be destroyed before its
    4998                 :            :                          * children.
    4999                 :            :                          */
    5000                 :            :                         cgroup_put(cgroup_parent(cgrp));
    5001                 :          3 :                         kernfs_put(cgrp->kn);
    5002                 :            :                         psi_cgroup_free(cgrp);
    5003                 :          3 :                         if (cgroup_on_dfl(cgrp))
    5004                 :          3 :                                 cgroup_rstat_exit(cgrp);
    5005                 :          3 :                         kfree(cgrp);
    5006                 :            :                 } else {
    5007                 :            :                         /*
    5008                 :            :                          * This is root cgroup's refcnt reaching zero,
    5009                 :            :                          * which indicates that the root should be
    5010                 :            :                          * released.
    5011                 :            :                          */
    5012                 :          0 :                         cgroup_destroy_root(cgrp->root);
    5013                 :            :                 }
    5014                 :            :         }
    5015                 :          3 : }
    5016                 :            : 
    5017                 :          3 : static void css_release_work_fn(struct work_struct *work)
    5018                 :            : {
    5019                 :            :         struct cgroup_subsys_state *css =
    5020                 :          3 :                 container_of(work, struct cgroup_subsys_state, destroy_work);
    5021                 :          3 :         struct cgroup_subsys *ss = css->ss;
    5022                 :          3 :         struct cgroup *cgrp = css->cgroup;
    5023                 :            : 
    5024                 :          3 :         mutex_lock(&cgroup_mutex);
    5025                 :            : 
    5026                 :          3 :         css->flags |= CSS_RELEASED;
    5027                 :            :         list_del_rcu(&css->sibling);
    5028                 :            : 
    5029                 :          3 :         if (ss) {
    5030                 :            :                 /* css release path */
    5031                 :          3 :                 if (!list_empty(&css->rstat_css_node)) {
    5032                 :          0 :                         cgroup_rstat_flush(cgrp);
    5033                 :            :                         list_del_rcu(&css->rstat_css_node);
    5034                 :            :                 }
    5035                 :            : 
    5036                 :          3 :                 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
    5037                 :          3 :                 if (ss->css_released)
    5038                 :          0 :                         ss->css_released(css);
    5039                 :            :         } else {
    5040                 :            :                 struct cgroup *tcgrp;
    5041                 :            : 
    5042                 :            :                 /* cgroup release path */
    5043                 :          3 :                 TRACE_CGROUP_PATH(release, cgrp);
    5044                 :            : 
    5045                 :          3 :                 if (cgroup_on_dfl(cgrp))
    5046                 :          3 :                         cgroup_rstat_flush(cgrp);
    5047                 :            : 
    5048                 :            :                 spin_lock_irq(&css_set_lock);
    5049                 :          3 :                 for (tcgrp = cgroup_parent(cgrp); tcgrp;
    5050                 :            :                      tcgrp = cgroup_parent(tcgrp))
    5051                 :          3 :                         tcgrp->nr_dying_descendants--;
    5052                 :            :                 spin_unlock_irq(&css_set_lock);
    5053                 :            : 
    5054                 :          3 :                 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
    5055                 :          3 :                 cgrp->id = -1;
    5056                 :            : 
    5057                 :            :                 /*
    5058                 :            :                  * There are two control paths which try to determine
    5059                 :            :                  * cgroup from dentry without going through kernfs -
    5060                 :            :                  * cgroupstats_build() and css_tryget_online_from_dir().
    5061                 :            :                  * Those are supported by RCU protecting clearing of
    5062                 :            :                  * cgrp->kn->priv backpointer.
    5063                 :            :                  */
    5064                 :          3 :                 if (cgrp->kn)
    5065                 :            :                         RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
    5066                 :            :                                          NULL);
    5067                 :            :         }
    5068                 :            : 
    5069                 :          3 :         mutex_unlock(&cgroup_mutex);
    5070                 :            : 
    5071                 :          3 :         INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
    5072                 :          3 :         queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
    5073                 :          3 : }
    5074                 :            : 
    5075                 :          3 : static void css_release(struct percpu_ref *ref)
    5076                 :            : {
    5077                 :            :         struct cgroup_subsys_state *css =
    5078                 :            :                 container_of(ref, struct cgroup_subsys_state, refcnt);
    5079                 :            : 
    5080                 :          3 :         INIT_WORK(&css->destroy_work, css_release_work_fn);
    5081                 :          3 :         queue_work(cgroup_destroy_wq, &css->destroy_work);
    5082                 :          3 : }
    5083                 :            : 
    5084                 :          3 : static void init_and_link_css(struct cgroup_subsys_state *css,
    5085                 :            :                               struct cgroup_subsys *ss, struct cgroup *cgrp)
    5086                 :            : {
    5087                 :            :         lockdep_assert_held(&cgroup_mutex);
    5088                 :            : 
    5089                 :          3 :         cgroup_get_live(cgrp);
    5090                 :            : 
    5091                 :          3 :         memset(css, 0, sizeof(*css));
    5092                 :          3 :         css->cgroup = cgrp;
    5093                 :          3 :         css->ss = ss;
    5094                 :          3 :         css->id = -1;
    5095                 :          3 :         INIT_LIST_HEAD(&css->sibling);
    5096                 :          3 :         INIT_LIST_HEAD(&css->children);
    5097                 :          3 :         INIT_LIST_HEAD(&css->rstat_css_node);
    5098                 :          3 :         css->serial_nr = css_serial_nr_next++;
    5099                 :            :         atomic_set(&css->online_cnt, 0);
    5100                 :            : 
    5101                 :          3 :         if (cgroup_parent(cgrp)) {
    5102                 :          3 :                 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
    5103                 :            :                 css_get(css->parent);
    5104                 :            :         }
    5105                 :            : 
    5106                 :          3 :         if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
    5107                 :          0 :                 list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
    5108                 :            : 
    5109                 :          3 :         BUG_ON(cgroup_css(cgrp, ss));
    5110                 :          3 : }
    5111                 :            : 
    5112                 :            : /* invoke ->css_online() on a new CSS and mark it online if successful */
    5113                 :          3 : static int online_css(struct cgroup_subsys_state *css)
    5114                 :            : {
    5115                 :          3 :         struct cgroup_subsys *ss = css->ss;
    5116                 :            :         int ret = 0;
    5117                 :            : 
    5118                 :            :         lockdep_assert_held(&cgroup_mutex);
    5119                 :            : 
    5120                 :          3 :         if (ss->css_online)
    5121                 :          3 :                 ret = ss->css_online(css);
    5122                 :          3 :         if (!ret) {
    5123                 :          3 :                 css->flags |= CSS_ONLINE;
    5124                 :          3 :                 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
    5125                 :            : 
    5126                 :          3 :                 atomic_inc(&css->online_cnt);
    5127                 :          3 :                 if (css->parent)
    5128                 :          3 :                         atomic_inc(&css->parent->online_cnt);
    5129                 :            :         }
    5130                 :          3 :         return ret;
    5131                 :            : }
    5132                 :            : 
    5133                 :            : /* if the CSS is online, invoke ->css_offline() on it and mark it offline */
    5134                 :          3 : static void offline_css(struct cgroup_subsys_state *css)
    5135                 :            : {
    5136                 :          3 :         struct cgroup_subsys *ss = css->ss;
    5137                 :            : 
    5138                 :            :         lockdep_assert_held(&cgroup_mutex);
    5139                 :            : 
    5140                 :          3 :         if (!(css->flags & CSS_ONLINE))
    5141                 :          3 :                 return;
    5142                 :            : 
    5143                 :          3 :         if (ss->css_offline)
    5144                 :          3 :                 ss->css_offline(css);
    5145                 :            : 
    5146                 :          3 :         css->flags &= ~CSS_ONLINE;
    5147                 :          3 :         RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
    5148                 :            : 
    5149                 :          3 :         wake_up_all(&css->cgroup->offline_waitq);
    5150                 :            : }
    5151                 :            : 
    5152                 :            : /**
    5153                 :            :  * css_create - create a cgroup_subsys_state
    5154                 :            :  * @cgrp: the cgroup new css will be associated with
    5155                 :            :  * @ss: the subsys of new css
    5156                 :            :  *
    5157                 :            :  * Create a new css associated with @cgrp - @ss pair.  On success, the new
    5158                 :            :  * css is online and installed in @cgrp.  This function doesn't create the
    5159                 :            :  * interface files.  Returns 0 on success, -errno on failure.
    5160                 :            :  */
    5161                 :          3 : static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
    5162                 :            :                                               struct cgroup_subsys *ss)
    5163                 :            : {
    5164                 :            :         struct cgroup *parent = cgroup_parent(cgrp);
    5165                 :            :         struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
    5166                 :            :         struct cgroup_subsys_state *css;
    5167                 :            :         int err;
    5168                 :            : 
    5169                 :            :         lockdep_assert_held(&cgroup_mutex);
    5170                 :            : 
    5171                 :          3 :         css = ss->css_alloc(parent_css);
    5172                 :          3 :         if (!css)
    5173                 :            :                 css = ERR_PTR(-ENOMEM);
    5174                 :          3 :         if (IS_ERR(css))
    5175                 :            :                 return css;
    5176                 :            : 
    5177                 :          3 :         init_and_link_css(css, ss, cgrp);
    5178                 :            : 
    5179                 :          3 :         err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
    5180                 :          3 :         if (err)
    5181                 :            :                 goto err_free_css;
    5182                 :            : 
    5183                 :          3 :         err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
    5184                 :          3 :         if (err < 0)
    5185                 :            :                 goto err_free_css;
    5186                 :          3 :         css->id = err;
    5187                 :            : 
    5188                 :            :         /* @css is ready to be brought online now, make it visible */
    5189                 :          3 :         list_add_tail_rcu(&css->sibling, &parent_css->children);
    5190                 :          3 :         cgroup_idr_replace(&ss->css_idr, css, css->id);
    5191                 :            : 
    5192                 :          3 :         err = online_css(css);
    5193                 :          3 :         if (err)
    5194                 :            :                 goto err_list_del;
    5195                 :            : 
    5196                 :          3 :         if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
    5197                 :            :             cgroup_parent(parent)) {
    5198                 :          0 :                 pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
    5199                 :            :                         current->comm, current->pid, ss->name);
    5200                 :          0 :                 if (!strcmp(ss->name, "memory"))
    5201                 :          0 :                         pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
    5202                 :          0 :                 ss->warned_broken_hierarchy = true;
    5203                 :            :         }
    5204                 :            : 
    5205                 :          3 :         return css;
    5206                 :            : 
    5207                 :            : err_list_del:
    5208                 :            :         list_del_rcu(&css->sibling);
    5209                 :            : err_free_css:
    5210                 :            :         list_del_rcu(&css->rstat_css_node);
    5211                 :          0 :         INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
    5212                 :          0 :         queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
    5213                 :          0 :         return ERR_PTR(err);
    5214                 :            : }
    5215                 :            : 
    5216                 :            : /*
    5217                 :            :  * The returned cgroup is fully initialized including its control mask, but
    5218                 :            :  * it isn't associated with its kernfs_node and doesn't have the control
    5219                 :            :  * mask applied.
    5220                 :            :  */
    5221                 :          3 : static struct cgroup *cgroup_create(struct cgroup *parent)
    5222                 :            : {
    5223                 :          3 :         struct cgroup_root *root = parent->root;
    5224                 :            :         struct cgroup *cgrp, *tcgrp;
    5225                 :          3 :         int level = parent->level + 1;
    5226                 :            :         int ret;
    5227                 :            : 
    5228                 :            :         /* allocate the cgroup and its ID, 0 is reserved for the root */
    5229                 :          3 :         cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
    5230                 :            :                        GFP_KERNEL);
    5231                 :          3 :         if (!cgrp)
    5232                 :            :                 return ERR_PTR(-ENOMEM);
    5233                 :            : 
    5234                 :          3 :         ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
    5235                 :          3 :         if (ret)
    5236                 :            :                 goto out_free_cgrp;
    5237                 :            : 
    5238                 :          3 :         if (cgroup_on_dfl(parent)) {
    5239                 :          3 :                 ret = cgroup_rstat_init(cgrp);
    5240                 :          3 :                 if (ret)
    5241                 :            :                         goto out_cancel_ref;
    5242                 :            :         }
    5243                 :            : 
    5244                 :            :         /*
    5245                 :            :          * Temporarily set the pointer to NULL, so idr_find() won't return
    5246                 :            :          * a half-baked cgroup.
    5247                 :            :          */
    5248                 :          3 :         cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
    5249                 :          3 :         if (cgrp->id < 0) {
    5250                 :            :                 ret = -ENOMEM;
    5251                 :            :                 goto out_stat_exit;
    5252                 :            :         }
    5253                 :            : 
    5254                 :          3 :         init_cgroup_housekeeping(cgrp);
    5255                 :            : 
    5256                 :          3 :         cgrp->self.parent = &parent->self;
    5257                 :          3 :         cgrp->root = root;
    5258                 :          3 :         cgrp->level = level;
    5259                 :            : 
    5260                 :            :         ret = psi_cgroup_alloc(cgrp);
    5261                 :            :         if (ret)
    5262                 :            :                 goto out_idr_free;
    5263                 :            : 
    5264                 :          3 :         ret = cgroup_bpf_inherit(cgrp);
    5265                 :          3 :         if (ret)
    5266                 :            :                 goto out_psi_free;
    5267                 :            : 
    5268                 :            :         /*
    5269                 :            :          * New cgroup inherits effective freeze counter, and
    5270                 :            :          * if the parent has to be frozen, the child has too.
    5271                 :            :          */
    5272                 :          3 :         cgrp->freezer.e_freeze = parent->freezer.e_freeze;
    5273                 :          3 :         if (cgrp->freezer.e_freeze) {
    5274                 :            :                 /*
    5275                 :            :                  * Set the CGRP_FREEZE flag, so when a process will be
    5276                 :            :                  * attached to the child cgroup, it will become frozen.
    5277                 :            :                  * At this point the new cgroup is unpopulated, so we can
    5278                 :            :                  * consider it frozen immediately.
    5279                 :            :                  */
    5280                 :          0 :                 set_bit(CGRP_FREEZE, &cgrp->flags);
    5281                 :          0 :                 set_bit(CGRP_FROZEN, &cgrp->flags);
    5282                 :            :         }
    5283                 :            : 
    5284                 :            :         spin_lock_irq(&css_set_lock);
    5285                 :          3 :         for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
    5286                 :          3 :                 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
    5287                 :            : 
    5288                 :          3 :                 if (tcgrp != cgrp) {
    5289                 :          3 :                         tcgrp->nr_descendants++;
    5290                 :            : 
    5291                 :            :                         /*
    5292                 :            :                          * If the new cgroup is frozen, all ancestor cgroups
    5293                 :            :                          * get a new frozen descendant, but their state can't
    5294                 :            :                          * change because of this.
    5295                 :            :                          */
    5296                 :          3 :                         if (cgrp->freezer.e_freeze)
    5297                 :          0 :                                 tcgrp->freezer.nr_frozen_descendants++;
    5298                 :            :                 }
    5299                 :            :         }
    5300                 :            :         spin_unlock_irq(&css_set_lock);
    5301                 :            : 
    5302                 :          3 :         if (notify_on_release(parent))
    5303                 :          0 :                 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
    5304                 :            : 
    5305                 :          3 :         if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
    5306                 :          0 :                 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
    5307                 :            : 
    5308                 :          3 :         cgrp->self.serial_nr = css_serial_nr_next++;
    5309                 :            : 
    5310                 :            :         /* allocation complete, commit to creation */
    5311                 :          3 :         list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
    5312                 :          3 :         atomic_inc(&root->nr_cgrps);
    5313                 :          3 :         cgroup_get_live(parent);
    5314                 :            : 
    5315                 :            :         /*
    5316                 :            :          * @cgrp is now fully operational.  If something fails after this
    5317                 :            :          * point, it'll be released via the normal destruction path.
    5318                 :            :          */
    5319                 :          3 :         cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
    5320                 :            : 
    5321                 :            :         /*
    5322                 :            :          * On the default hierarchy, a child doesn't automatically inherit
    5323                 :            :          * subtree_control from the parent.  Each is configured manually.
    5324                 :            :          */
    5325                 :          3 :         if (!cgroup_on_dfl(cgrp))
    5326                 :          3 :                 cgrp->subtree_control = cgroup_control(cgrp);
    5327                 :            : 
    5328                 :          3 :         cgroup_propagate_control(cgrp);
    5329                 :            : 
    5330                 :          3 :         return cgrp;
    5331                 :            : 
    5332                 :            : out_psi_free:
    5333                 :            :         psi_cgroup_free(cgrp);
    5334                 :            : out_idr_free:
    5335                 :          0 :         cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
    5336                 :            : out_stat_exit:
    5337                 :          0 :         if (cgroup_on_dfl(parent))
    5338                 :          0 :                 cgroup_rstat_exit(cgrp);
    5339                 :            : out_cancel_ref:
    5340                 :          0 :         percpu_ref_exit(&cgrp->self.refcnt);
    5341                 :            : out_free_cgrp:
    5342                 :          0 :         kfree(cgrp);
    5343                 :          0 :         return ERR_PTR(ret);
    5344                 :            : }
    5345                 :            : 
    5346                 :            : static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
    5347                 :            : {
    5348                 :            :         struct cgroup *cgroup;
    5349                 :            :         int ret = false;
    5350                 :            :         int level = 1;
    5351                 :            : 
    5352                 :            :         lockdep_assert_held(&cgroup_mutex);
    5353                 :            : 
    5354                 :          3 :         for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
    5355                 :          3 :                 if (cgroup->nr_descendants >= cgroup->max_descendants)
    5356                 :            :                         goto fail;
    5357                 :            : 
    5358                 :          3 :                 if (level > cgroup->max_depth)
    5359                 :            :                         goto fail;
    5360                 :            : 
    5361                 :          3 :                 level++;
    5362                 :            :         }
    5363                 :            : 
    5364                 :            :         ret = true;
    5365                 :            : fail:
    5366                 :            :         return ret;
    5367                 :            : }
    5368                 :            : 
    5369                 :          3 : int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
    5370                 :            : {
    5371                 :            :         struct cgroup *parent, *cgrp;
    5372                 :            :         struct kernfs_node *kn;
    5373                 :            :         int ret;
    5374                 :            : 
    5375                 :            :         /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
    5376                 :          3 :         if (strchr(name, '\n'))
    5377                 :            :                 return -EINVAL;
    5378                 :            : 
    5379                 :          3 :         parent = cgroup_kn_lock_live(parent_kn, false);
    5380                 :          3 :         if (!parent)
    5381                 :            :                 return -ENODEV;
    5382                 :            : 
    5383                 :          3 :         if (!cgroup_check_hierarchy_limits(parent)) {
    5384                 :            :                 ret = -EAGAIN;
    5385                 :            :                 goto out_unlock;
    5386                 :            :         }
    5387                 :            : 
    5388                 :          3 :         cgrp = cgroup_create(parent);
    5389                 :          3 :         if (IS_ERR(cgrp)) {
    5390                 :            :                 ret = PTR_ERR(cgrp);
    5391                 :          0 :                 goto out_unlock;
    5392                 :            :         }
    5393                 :            : 
    5394                 :            :         /* create the directory */
    5395                 :          3 :         kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
    5396                 :          3 :         if (IS_ERR(kn)) {
    5397                 :            :                 ret = PTR_ERR(kn);
    5398                 :          0 :                 goto out_destroy;
    5399                 :            :         }
    5400                 :          3 :         cgrp->kn = kn;
    5401                 :            : 
    5402                 :            :         /*
    5403                 :            :          * This extra ref will be put in cgroup_free_fn() and guarantees
    5404                 :            :          * that @cgrp->kn is always accessible.
    5405                 :            :          */
    5406                 :          3 :         kernfs_get(kn);
    5407                 :            : 
    5408                 :          3 :         ret = cgroup_kn_set_ugid(kn);
    5409                 :          3 :         if (ret)
    5410                 :            :                 goto out_destroy;
    5411                 :            : 
    5412                 :          3 :         ret = css_populate_dir(&cgrp->self);
    5413                 :          3 :         if (ret)
    5414                 :            :                 goto out_destroy;
    5415                 :            : 
    5416                 :          3 :         ret = cgroup_apply_control_enable(cgrp);
    5417                 :          3 :         if (ret)
    5418                 :            :                 goto out_destroy;
    5419                 :            : 
    5420                 :          3 :         TRACE_CGROUP_PATH(mkdir, cgrp);
    5421                 :            : 
    5422                 :            :         /* let's create and online css's */
    5423                 :          3 :         kernfs_activate(kn);
    5424                 :            : 
    5425                 :            :         ret = 0;
    5426                 :          3 :         goto out_unlock;
    5427                 :            : 
    5428                 :            : out_destroy:
    5429                 :          0 :         cgroup_destroy_locked(cgrp);
    5430                 :            : out_unlock:
    5431                 :          3 :         cgroup_kn_unlock(parent_kn);
    5432                 :          3 :         return ret;
    5433                 :            : }
    5434                 :            : 
    5435                 :            : /*
    5436                 :            :  * This is called when the refcnt of a css is confirmed to be killed.
    5437                 :            :  * css_tryget_online() is now guaranteed to fail.  Tell the subsystem to
    5438                 :            :  * initate destruction and put the css ref from kill_css().
    5439                 :            :  */
    5440                 :          3 : static void css_killed_work_fn(struct work_struct *work)
    5441                 :            : {
    5442                 :            :         struct cgroup_subsys_state *css =
    5443                 :          3 :                 container_of(work, struct cgroup_subsys_state, destroy_work);
    5444                 :            : 
    5445                 :          3 :         mutex_lock(&cgroup_mutex);
    5446                 :            : 
    5447                 :            :         do {
    5448                 :          3 :                 offline_css(css);
    5449                 :            :                 css_put(css);
    5450                 :            :                 /* @css can't go away while we're holding cgroup_mutex */
    5451                 :          3 :                 css = css->parent;
    5452                 :          3 :         } while (css && atomic_dec_and_test(&css->online_cnt));
    5453                 :            : 
    5454                 :          3 :         mutex_unlock(&cgroup_mutex);
    5455                 :          3 : }
    5456                 :            : 
    5457                 :            : /* css kill confirmation processing requires process context, bounce */
    5458                 :          3 : static void css_killed_ref_fn(struct percpu_ref *ref)
    5459                 :            : {
    5460                 :            :         struct cgroup_subsys_state *css =
    5461                 :            :                 container_of(ref, struct cgroup_subsys_state, refcnt);
    5462                 :            : 
    5463                 :          3 :         if (atomic_dec_and_test(&css->online_cnt)) {
    5464                 :          3 :                 INIT_WORK(&css->destroy_work, css_killed_work_fn);
    5465                 :          3 :                 queue_work(cgroup_destroy_wq, &css->destroy_work);
    5466                 :            :         }
    5467                 :          3 : }
    5468                 :            : 
    5469                 :            : /**
    5470                 :            :  * kill_css - destroy a css
    5471                 :            :  * @css: css to destroy
    5472                 :            :  *
    5473                 :            :  * This function initiates destruction of @css by removing cgroup interface
    5474                 :            :  * files and putting its base reference.  ->css_offline() will be invoked
    5475                 :            :  * asynchronously once css_tryget_online() is guaranteed to fail and when
    5476                 :            :  * the reference count reaches zero, @css will be released.
    5477                 :            :  */
    5478                 :          3 : static void kill_css(struct cgroup_subsys_state *css)
    5479                 :            : {
    5480                 :            :         lockdep_assert_held(&cgroup_mutex);
    5481                 :            : 
    5482                 :          3 :         if (css->flags & CSS_DYING)
    5483                 :          3 :                 return;
    5484                 :            : 
    5485                 :          3 :         css->flags |= CSS_DYING;
    5486                 :            : 
    5487                 :            :         /*
    5488                 :            :          * This must happen before css is disassociated with its cgroup.
    5489                 :            :          * See seq_css() for details.
    5490                 :            :          */
    5491                 :          3 :         css_clear_dir(css);
    5492                 :            : 
    5493                 :            :         /*
    5494                 :            :          * Killing would put the base ref, but we need to keep it alive
    5495                 :            :          * until after ->css_offline().
    5496                 :            :          */
    5497                 :            :         css_get(css);
    5498                 :            : 
    5499                 :            :         /*
    5500                 :            :          * cgroup core guarantees that, by the time ->css_offline() is
    5501                 :            :          * invoked, no new css reference will be given out via
    5502                 :            :          * css_tryget_online().  We can't simply call percpu_ref_kill() and
    5503                 :            :          * proceed to offlining css's because percpu_ref_kill() doesn't
    5504                 :            :          * guarantee that the ref is seen as killed on all CPUs on return.
    5505                 :            :          *
    5506                 :            :          * Use percpu_ref_kill_and_confirm() to get notifications as each
    5507                 :            :          * css is confirmed to be seen as killed on all CPUs.
    5508                 :            :          */
    5509                 :          3 :         percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
    5510                 :            : }
    5511                 :            : 
    5512                 :            : /**
    5513                 :            :  * cgroup_destroy_locked - the first stage of cgroup destruction
    5514                 :            :  * @cgrp: cgroup to be destroyed
    5515                 :            :  *
    5516                 :            :  * css's make use of percpu refcnts whose killing latency shouldn't be
    5517                 :            :  * exposed to userland and are RCU protected.  Also, cgroup core needs to
    5518                 :            :  * guarantee that css_tryget_online() won't succeed by the time
    5519                 :            :  * ->css_offline() is invoked.  To satisfy all the requirements,
    5520                 :            :  * destruction is implemented in the following two steps.
    5521                 :            :  *
    5522                 :            :  * s1. Verify @cgrp can be destroyed and mark it dying.  Remove all
    5523                 :            :  *     userland visible parts and start killing the percpu refcnts of
    5524                 :            :  *     css's.  Set up so that the next stage will be kicked off once all
    5525                 :            :  *     the percpu refcnts are confirmed to be killed.
    5526                 :            :  *
    5527                 :            :  * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
    5528                 :            :  *     rest of destruction.  Once all cgroup references are gone, the
    5529                 :            :  *     cgroup is RCU-freed.
    5530                 :            :  *
    5531                 :            :  * This function implements s1.  After this step, @cgrp is gone as far as
    5532                 :            :  * the userland is concerned and a new cgroup with the same name may be
    5533                 :            :  * created.  As cgroup doesn't care about the names internally, this
    5534                 :            :  * doesn't cause any problem.
    5535                 :            :  */
    5536                 :          3 : static int cgroup_destroy_locked(struct cgroup *cgrp)
    5537                 :            :         __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
    5538                 :            : {
    5539                 :            :         struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
    5540                 :            :         struct cgroup_subsys_state *css;
    5541                 :            :         struct cgrp_cset_link *link;
    5542                 :            :         int ssid;
    5543                 :            : 
    5544                 :            :         lockdep_assert_held(&cgroup_mutex);
    5545                 :            : 
    5546                 :            :         /*
    5547                 :            :          * Only migration can raise populated from zero and we're already
    5548                 :            :          * holding cgroup_mutex.
    5549                 :            :          */
    5550                 :          3 :         if (cgroup_is_populated(cgrp))
    5551                 :            :                 return -EBUSY;
    5552                 :            : 
    5553                 :            :         /*
    5554                 :            :          * Make sure there's no live children.  We can't test emptiness of
    5555                 :            :          * ->self.children as dead children linger on it while being
    5556                 :            :          * drained; otherwise, "rmdir parent/child parent" may fail.
    5557                 :            :          */
    5558                 :          3 :         if (css_has_online_children(&cgrp->self))
    5559                 :            :                 return -EBUSY;
    5560                 :            : 
    5561                 :            :         /*
    5562                 :            :          * Mark @cgrp and the associated csets dead.  The former prevents
    5563                 :            :          * further task migration and child creation by disabling
    5564                 :            :          * cgroup_lock_live_group().  The latter makes the csets ignored by
    5565                 :            :          * the migration path.
    5566                 :            :          */
    5567                 :          3 :         cgrp->self.flags &= ~CSS_ONLINE;
    5568                 :            : 
    5569                 :            :         spin_lock_irq(&css_set_lock);
    5570                 :          3 :         list_for_each_entry(link, &cgrp->cset_links, cset_link)
    5571                 :          3 :                 link->cset->dead = true;
    5572                 :            :         spin_unlock_irq(&css_set_lock);
    5573                 :            : 
    5574                 :            :         /* initiate massacre of all css's */
    5575                 :          3 :         for_each_css(css, ssid, cgrp)
    5576                 :          3 :                 kill_css(css);
    5577                 :            : 
    5578                 :            :         /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */
    5579                 :          3 :         css_clear_dir(&cgrp->self);
    5580                 :          3 :         kernfs_remove(cgrp->kn);
    5581                 :            : 
    5582                 :          3 :         if (parent && cgroup_is_threaded(cgrp))
    5583                 :          0 :                 parent->nr_threaded_children--;
    5584                 :            : 
    5585                 :            :         spin_lock_irq(&css_set_lock);
    5586                 :          3 :         for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
    5587                 :          3 :                 tcgrp->nr_descendants--;
    5588                 :          3 :                 tcgrp->nr_dying_descendants++;
    5589                 :            :                 /*
    5590                 :            :                  * If the dying cgroup is frozen, decrease frozen descendants
    5591                 :            :                  * counters of ancestor cgroups.
    5592                 :            :                  */
    5593                 :          3 :                 if (test_bit(CGRP_FROZEN, &cgrp->flags))
    5594                 :          0 :                         tcgrp->freezer.nr_frozen_descendants--;
    5595                 :            :         }
    5596                 :            :         spin_unlock_irq(&css_set_lock);
    5597                 :            : 
    5598                 :          3 :         cgroup1_check_for_release(parent);
    5599                 :            : 
    5600                 :          3 :         cgroup_bpf_offline(cgrp);
    5601                 :            : 
    5602                 :            :         /* put the base reference */
    5603                 :          3 :         percpu_ref_kill(&cgrp->self.refcnt);
    5604                 :            : 
    5605                 :          3 :         return 0;
    5606                 :            : };
    5607                 :            : 
    5608                 :          3 : int cgroup_rmdir(struct kernfs_node *kn)
    5609                 :            : {
    5610                 :            :         struct cgroup *cgrp;
    5611                 :            :         int ret = 0;
    5612                 :            : 
    5613                 :          3 :         cgrp = cgroup_kn_lock_live(kn, false);
    5614                 :          3 :         if (!cgrp)
    5615                 :            :                 return 0;
    5616                 :            : 
    5617                 :          3 :         ret = cgroup_destroy_locked(cgrp);
    5618                 :          3 :         if (!ret)
    5619                 :          3 :                 TRACE_CGROUP_PATH(rmdir, cgrp);
    5620                 :            : 
    5621                 :          3 :         cgroup_kn_unlock(kn);
    5622                 :          3 :         return ret;
    5623                 :            : }
    5624                 :            : 
    5625                 :            : static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
    5626                 :            :         .show_options           = cgroup_show_options,
    5627                 :            :         .mkdir                  = cgroup_mkdir,
    5628                 :            :         .rmdir                  = cgroup_rmdir,
    5629                 :            :         .show_path              = cgroup_show_path,
    5630                 :            : };
    5631                 :            : 
    5632                 :          3 : static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
    5633                 :            : {
    5634                 :            :         struct cgroup_subsys_state *css;
    5635                 :            : 
    5636                 :            :         pr_debug("Initializing cgroup subsys %s\n", ss->name);
    5637                 :            : 
    5638                 :          3 :         mutex_lock(&cgroup_mutex);
    5639                 :            : 
    5640                 :            :         idr_init(&ss->css_idr);
    5641                 :          3 :         INIT_LIST_HEAD(&ss->cfts);
    5642                 :            : 
    5643                 :            :         /* Create the root cgroup state for this subsystem */
    5644                 :          3 :         ss->root = &cgrp_dfl_root;
    5645                 :          3 :         css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
    5646                 :            :         /* We don't handle early failures gracefully */
    5647                 :          3 :         BUG_ON(IS_ERR(css));
    5648                 :          3 :         init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
    5649                 :            : 
    5650                 :            :         /*
    5651                 :            :          * Root csses are never destroyed and we can't initialize
    5652                 :            :          * percpu_ref during early init.  Disable refcnting.
    5653                 :            :          */
    5654                 :          3 :         css->flags |= CSS_NO_REF;
    5655                 :            : 
    5656                 :          3 :         if (early) {
    5657                 :            :                 /* allocation can't be done safely during early init */
    5658                 :          3 :                 css->id = 1;
    5659                 :            :         } else {
    5660                 :          3 :                 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
    5661                 :          3 :                 BUG_ON(css->id < 0);
    5662                 :            :         }
    5663                 :            : 
    5664                 :            :         /* Update the init_css_set to contain a subsys
    5665                 :            :          * pointer to this state - since the subsystem is
    5666                 :            :          * newly registered, all tasks and hence the
    5667                 :            :          * init_css_set is in the subsystem's root cgroup. */
    5668                 :          3 :         init_css_set.subsys[ss->id] = css;
    5669                 :            : 
    5670                 :          3 :         have_fork_callback |= (bool)ss->fork << ss->id;
    5671                 :          3 :         have_exit_callback |= (bool)ss->exit << ss->id;
    5672                 :          3 :         have_release_callback |= (bool)ss->release << ss->id;
    5673                 :          3 :         have_canfork_callback |= (bool)ss->can_fork << ss->id;
    5674                 :            : 
    5675                 :            :         /* At system boot, before all subsystems have been
    5676                 :            :          * registered, no tasks have been forked, so we don't
    5677                 :            :          * need to invoke fork callbacks here. */
    5678                 :          3 :         BUG_ON(!list_empty(&init_task.tasks));
    5679                 :            : 
    5680                 :          3 :         BUG_ON(online_css(css));
    5681                 :            : 
    5682                 :          3 :         mutex_unlock(&cgroup_mutex);
    5683                 :          3 : }
    5684                 :            : 
    5685                 :            : /**
    5686                 :            :  * cgroup_init_early - cgroup initialization at system boot
    5687                 :            :  *
    5688                 :            :  * Initialize cgroups at system boot, and initialize any
    5689                 :            :  * subsystems that request early init.
    5690                 :            :  */
    5691                 :          3 : int __init cgroup_init_early(void)
    5692                 :            : {
    5693                 :            :         static struct cgroup_fs_context __initdata ctx;
    5694                 :            :         struct cgroup_subsys *ss;
    5695                 :            :         int i;
    5696                 :            : 
    5697                 :          3 :         ctx.root = &cgrp_dfl_root;
    5698                 :          3 :         init_cgroup_root(&ctx);
    5699                 :          3 :         cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
    5700                 :            : 
    5701                 :          3 :         RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
    5702                 :            : 
    5703                 :          3 :         for_each_subsys(ss, i) {
    5704                 :          3 :                 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
    5705                 :            :                      "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
    5706                 :            :                      i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
    5707                 :            :                      ss->id, ss->name);
    5708                 :          3 :                 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
    5709                 :            :                      "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
    5710                 :            : 
    5711                 :          3 :                 ss->id = i;
    5712                 :          3 :                 ss->name = cgroup_subsys_name[i];
    5713                 :          3 :                 if (!ss->legacy_name)
    5714                 :          3 :                         ss->legacy_name = cgroup_subsys_name[i];
    5715                 :            : 
    5716                 :          3 :                 if (ss->early_init)
    5717                 :          3 :                         cgroup_init_subsys(ss, true);
    5718                 :            :         }
    5719                 :          3 :         return 0;
    5720                 :            : }
    5721                 :            : 
    5722                 :            : static u16 cgroup_disable_mask __initdata;
    5723                 :            : static u16 cgroup_enable_mask __initdata;
    5724                 :            : static int __init cgroup_disable(char *str);
    5725                 :            : 
    5726                 :            : /**
    5727                 :            :  * cgroup_init - cgroup initialization
    5728                 :            :  *
    5729                 :            :  * Register cgroup filesystem and /proc file, and initialize
    5730                 :            :  * any subsystems that didn't request early init.
    5731                 :            :  */
    5732                 :          3 : int __init cgroup_init(void)
    5733                 :            : {
    5734                 :            :         struct cgroup_subsys *ss;
    5735                 :            :         int ssid;
    5736                 :            : 
    5737                 :            :         BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
    5738                 :          3 :         BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
    5739                 :          3 :         BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
    5740                 :            : 
    5741                 :          3 :         cgroup_rstat_boot();
    5742                 :            : 
    5743                 :            :         /*
    5744                 :            :          * The latency of the synchronize_rcu() is too high for cgroups,
    5745                 :            :          * avoid it at the cost of forcing all readers into the slow path.
    5746                 :            :          */
    5747                 :          3 :         rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
    5748                 :            : 
    5749                 :          3 :         get_user_ns(init_cgroup_ns.user_ns);
    5750                 :            : 
    5751                 :          3 :         mutex_lock(&cgroup_mutex);
    5752                 :            : 
    5753                 :            :         /*
    5754                 :            :          * Add init_css_set to the hash table so that dfl_root can link to
    5755                 :            :          * it during init.
    5756                 :            :          */
    5757                 :          3 :         hash_add(css_set_table, &init_css_set.hlist,
    5758                 :            :                  css_set_hash(init_css_set.subsys));
    5759                 :            : 
    5760                 :          3 :         BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
    5761                 :            : 
    5762                 :          3 :         mutex_unlock(&cgroup_mutex);
    5763                 :            : 
    5764                 :            :         /* Apply an implicit disable... */
    5765                 :          3 :         cgroup_disable("memory");
    5766                 :            : 
    5767                 :            :         /* ...knowing that an explicit enable will override it. */
    5768                 :          3 :         cgroup_disable_mask &= ~cgroup_enable_mask;
    5769                 :            : 
    5770                 :          3 :         for_each_subsys(ss, ssid) {
    5771                 :          3 :                 if (ss->early_init) {
    5772                 :          3 :                         struct cgroup_subsys_state *css =
    5773                 :          3 :                                 init_css_set.subsys[ss->id];
    5774                 :            : 
    5775                 :          3 :                         css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
    5776                 :            :                                                    GFP_KERNEL);
    5777                 :          3 :                         BUG_ON(css->id < 0);
    5778                 :            :                 } else {
    5779                 :          3 :                         cgroup_init_subsys(ss, false);
    5780                 :            :                 }
    5781                 :            : 
    5782                 :          3 :                 list_add_tail(&init_css_set.e_cset_node[ssid],
    5783                 :            :                               &cgrp_dfl_root.cgrp.e_csets[ssid]);
    5784                 :            : 
    5785                 :            :                 /*
    5786                 :            :                  * Setting dfl_root subsys_mask needs to consider the
    5787                 :            :                  * disabled flag and cftype registration needs kmalloc,
    5788                 :            :                  * both of which aren't available during early_init.
    5789                 :            :                  */
    5790                 :          3 :                 if (cgroup_disable_mask & (1 << ssid)) {
    5791                 :          3 :                         static_branch_disable(cgroup_subsys_enabled_key[ssid]);
    5792                 :          3 :                         printk(KERN_INFO "Disabling %s control group subsystem\n",
    5793                 :            :                                ss->name);
    5794                 :          3 :                         continue;
    5795                 :            :                 }
    5796                 :            : 
    5797                 :          3 :                 if (cgroup1_ssid_disabled(ssid))
    5798                 :          0 :                         printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
    5799                 :            :                                ss->name);
    5800                 :            : 
    5801                 :          3 :                 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
    5802                 :            : 
    5803                 :            :                 /* implicit controllers must be threaded too */
    5804                 :          3 :                 WARN_ON(ss->implicit_on_dfl && !ss->threaded);
    5805                 :            : 
    5806                 :          3 :                 if (ss->implicit_on_dfl)
    5807                 :          3 :                         cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
    5808                 :          3 :                 else if (!ss->dfl_cftypes)
    5809                 :          3 :                         cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
    5810                 :            : 
    5811                 :          3 :                 if (ss->threaded)
    5812                 :          3 :                         cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
    5813                 :            : 
    5814                 :          3 :                 if (ss->dfl_cftypes == ss->legacy_cftypes) {
    5815                 :          3 :                         WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
    5816                 :            :                 } else {
    5817                 :          3 :                         WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
    5818                 :          3 :                         WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
    5819                 :            :                 }
    5820                 :            : 
    5821                 :          3 :                 if (ss->bind)
    5822                 :          3 :                         ss->bind(init_css_set.subsys[ssid]);
    5823                 :            : 
    5824                 :          3 :                 mutex_lock(&cgroup_mutex);
    5825                 :          3 :                 css_populate_dir(init_css_set.subsys[ssid]);
    5826                 :          3 :                 mutex_unlock(&cgroup_mutex);
    5827                 :            :         }
    5828                 :            : 
    5829                 :            :         /* init_css_set.subsys[] has been updated, re-hash */
    5830                 :            :         hash_del(&init_css_set.hlist);
    5831                 :          3 :         hash_add(css_set_table, &init_css_set.hlist,
    5832                 :            :                  css_set_hash(init_css_set.subsys));
    5833                 :            : 
    5834                 :          3 :         WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
    5835                 :          3 :         WARN_ON(register_filesystem(&cgroup_fs_type));
    5836                 :          3 :         WARN_ON(register_filesystem(&cgroup2_fs_type));
    5837                 :          3 :         WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
    5838                 :            : #ifdef CONFIG_CPUSETS
    5839                 :          3 :         WARN_ON(register_filesystem(&cpuset_fs_type));
    5840                 :            : #endif
    5841                 :            : 
    5842                 :          3 :         return 0;
    5843                 :            : }
    5844                 :            : 
    5845                 :          3 : static int __init cgroup_wq_init(void)
    5846                 :            : {
    5847                 :            :         /*
    5848                 :            :          * There isn't much point in executing destruction path in
    5849                 :            :          * parallel.  Good chunk is serialized with cgroup_mutex anyway.
    5850                 :            :          * Use 1 for @max_active.
    5851                 :            :          *
    5852                 :            :          * We would prefer to do this in cgroup_init() above, but that
    5853                 :            :          * is called before init_workqueues(): so leave this until after.
    5854                 :            :          */
    5855                 :          3 :         cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
    5856                 :          3 :         BUG_ON(!cgroup_destroy_wq);
    5857                 :          3 :         return 0;
    5858                 :            : }
    5859                 :            : core_initcall(cgroup_wq_init);
    5860                 :            : 
    5861                 :          0 : void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
    5862                 :            :                                         char *buf, size_t buflen)
    5863                 :            : {
    5864                 :            :         struct kernfs_node *kn;
    5865                 :            : 
    5866                 :          0 :         kn = kernfs_get_node_by_id(cgrp_dfl_root.kf_root, id);
    5867                 :          0 :         if (!kn)
    5868                 :          0 :                 return;
    5869                 :            :         kernfs_path(kn, buf, buflen);
    5870                 :          0 :         kernfs_put(kn);
    5871                 :            : }
    5872                 :            : 
    5873                 :            : /*
    5874                 :            :  * proc_cgroup_show()
    5875                 :            :  *  - Print task's cgroup paths into seq_file, one line for each hierarchy
    5876                 :            :  *  - Used for /proc/<pid>/cgroup.
    5877                 :            :  */
    5878                 :          3 : int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
    5879                 :            :                      struct pid *pid, struct task_struct *tsk)
    5880                 :            : {
    5881                 :            :         char *buf;
    5882                 :            :         int retval;
    5883                 :            :         struct cgroup_root *root;
    5884                 :            : 
    5885                 :            :         retval = -ENOMEM;
    5886                 :            :         buf = kmalloc(PATH_MAX, GFP_KERNEL);
    5887                 :          3 :         if (!buf)
    5888                 :            :                 goto out;
    5889                 :            : 
    5890                 :          3 :         mutex_lock(&cgroup_mutex);
    5891                 :            :         spin_lock_irq(&css_set_lock);
    5892                 :            : 
    5893                 :          3 :         for_each_root(root) {
    5894                 :            :                 struct cgroup_subsys *ss;
    5895                 :            :                 struct cgroup *cgrp;
    5896                 :            :                 int ssid, count = 0;
    5897                 :            : 
    5898                 :          3 :                 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
    5899                 :          0 :                         continue;
    5900                 :            : 
    5901                 :          3 :                 seq_printf(m, "%d:", root->hierarchy_id);
    5902                 :          3 :                 if (root != &cgrp_dfl_root)
    5903                 :          3 :                         for_each_subsys(ss, ssid)
    5904                 :          3 :                                 if (root->subsys_mask & (1 << ssid))
    5905                 :          3 :                                         seq_printf(m, "%s%s", count++ ? "," : "",
    5906                 :            :                                                    ss->legacy_name);
    5907                 :          3 :                 if (strlen(root->name))
    5908                 :          3 :                         seq_printf(m, "%sname=%s", count ? "," : "",
    5909                 :          3 :                                    root->name);
    5910                 :          3 :                 seq_putc(m, ':');
    5911                 :            : 
    5912                 :            :                 cgrp = task_cgroup_from_root(tsk, root);
    5913                 :            : 
    5914                 :            :                 /*
    5915                 :            :                  * On traditional hierarchies, all zombie tasks show up as
    5916                 :            :                  * belonging to the root cgroup.  On the default hierarchy,
    5917                 :            :                  * while a zombie doesn't show up in "cgroup.procs" and
    5918                 :            :                  * thus can't be migrated, its /proc/PID/cgroup keeps
    5919                 :            :                  * reporting the cgroup it belonged to before exiting.  If
    5920                 :            :                  * the cgroup is removed before the zombie is reaped,
    5921                 :            :                  * " (deleted)" is appended to the cgroup path.
    5922                 :            :                  */
    5923                 :          3 :                 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
    5924                 :          3 :                         retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
    5925                 :          3 :                                                 current->nsproxy->cgroup_ns);
    5926                 :          3 :                         if (retval >= PATH_MAX)
    5927                 :            :                                 retval = -ENAMETOOLONG;
    5928                 :          3 :                         if (retval < 0)
    5929                 :            :                                 goto out_unlock;
    5930                 :            : 
    5931                 :          3 :                         seq_puts(m, buf);
    5932                 :            :                 } else {
    5933                 :          3 :                         seq_puts(m, "/");
    5934                 :            :                 }
    5935                 :            : 
    5936                 :          3 :                 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
    5937                 :          3 :                         seq_puts(m, " (deleted)\n");
    5938                 :            :                 else
    5939                 :          3 :                         seq_putc(m, '\n');
    5940                 :            :         }
    5941                 :            : 
    5942                 :            :         retval = 0;
    5943                 :            : out_unlock:
    5944                 :            :         spin_unlock_irq(&css_set_lock);
    5945                 :          3 :         mutex_unlock(&cgroup_mutex);
    5946                 :          3 :         kfree(buf);
    5947                 :            : out:
    5948                 :          3 :         return retval;
    5949                 :            : }
    5950                 :            : 
    5951                 :            : /**
    5952                 :            :  * cgroup_fork - initialize cgroup related fields during copy_process()
    5953                 :            :  * @child: pointer to task_struct of forking parent process.
    5954                 :            :  *
    5955                 :            :  * A task is associated with the init_css_set until cgroup_post_fork()
    5956                 :            :  * attaches it to the parent's css_set.  Empty cg_list indicates that
    5957                 :            :  * @child isn't holding reference to its css_set.
    5958                 :            :  */
    5959                 :          3 : void cgroup_fork(struct task_struct *child)
    5960                 :            : {
    5961                 :          3 :         RCU_INIT_POINTER(child->cgroups, &init_css_set);
    5962                 :          3 :         INIT_LIST_HEAD(&child->cg_list);
    5963                 :          3 : }
    5964                 :            : 
    5965                 :            : /**
    5966                 :            :  * cgroup_can_fork - called on a new task before the process is exposed
    5967                 :            :  * @child: the task in question.
    5968                 :            :  *
    5969                 :            :  * This calls the subsystem can_fork() callbacks. If the can_fork() callback
    5970                 :            :  * returns an error, the fork aborts with that error code. This allows for
    5971                 :            :  * a cgroup subsystem to conditionally allow or deny new forks.
    5972                 :            :  */
    5973                 :          3 : int cgroup_can_fork(struct task_struct *child)
    5974                 :            : {
    5975                 :            :         struct cgroup_subsys *ss;
    5976                 :            :         int i, j, ret;
    5977                 :            : 
    5978                 :          3 :         do_each_subsys_mask(ss, i, have_canfork_callback) {
    5979                 :          3 :                 ret = ss->can_fork(child);
    5980                 :          3 :                 if (ret)
    5981                 :            :                         goto out_revert;
    5982                 :            :         } while_each_subsys_mask();
    5983                 :            : 
    5984                 :          3 :         return 0;
    5985                 :            : 
    5986                 :            : out_revert:
    5987                 :          0 :         for_each_subsys(ss, j) {
    5988                 :          0 :                 if (j >= i)
    5989                 :            :                         break;
    5990                 :          0 :                 if (ss->cancel_fork)
    5991                 :          0 :                         ss->cancel_fork(child);
    5992                 :            :         }
    5993                 :            : 
    5994                 :          0 :         return ret;
    5995                 :            : }
    5996                 :            : 
    5997                 :            : /**
    5998                 :            :  * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
    5999                 :            :  * @child: the task in question
    6000                 :            :  *
    6001                 :            :  * This calls the cancel_fork() callbacks if a fork failed *after*
    6002                 :            :  * cgroup_can_fork() succeded.
    6003                 :            :  */
    6004                 :          0 : void cgroup_cancel_fork(struct task_struct *child)
    6005                 :            : {
    6006                 :            :         struct cgroup_subsys *ss;
    6007                 :            :         int i;
    6008                 :            : 
    6009                 :          0 :         for_each_subsys(ss, i)
    6010                 :          0 :                 if (ss->cancel_fork)
    6011                 :          0 :                         ss->cancel_fork(child);
    6012                 :          0 : }
    6013                 :            : 
    6014                 :            : /**
    6015                 :            :  * cgroup_post_fork - called on a new task after adding it to the task list
    6016                 :            :  * @child: the task in question
    6017                 :            :  *
    6018                 :            :  * Adds the task to the list running through its css_set if necessary and
    6019                 :            :  * call the subsystem fork() callbacks.  Has to be after the task is
    6020                 :            :  * visible on the task list in case we race with the first call to
    6021                 :            :  * cgroup_task_iter_start() - to guarantee that the new task ends up on its
    6022                 :            :  * list.
    6023                 :            :  */
    6024                 :          3 : void cgroup_post_fork(struct task_struct *child)
    6025                 :            : {
    6026                 :            :         struct cgroup_subsys *ss;
    6027                 :            :         int i;
    6028                 :            : 
    6029                 :            :         /*
    6030                 :            :          * This may race against cgroup_enable_task_cg_lists().  As that
    6031                 :            :          * function sets use_task_css_set_links before grabbing
    6032                 :            :          * tasklist_lock and we just went through tasklist_lock to add
    6033                 :            :          * @child, it's guaranteed that either we see the set
    6034                 :            :          * use_task_css_set_links or cgroup_enable_task_cg_lists() sees
    6035                 :            :          * @child during its iteration.
    6036                 :            :          *
    6037                 :            :          * If we won the race, @child is associated with %current's
    6038                 :            :          * css_set.  Grabbing css_set_lock guarantees both that the
    6039                 :            :          * association is stable, and, on completion of the parent's
    6040                 :            :          * migration, @child is visible in the source of migration or
    6041                 :            :          * already in the destination cgroup.  This guarantee is necessary
    6042                 :            :          * when implementing operations which need to migrate all tasks of
    6043                 :            :          * a cgroup to another.
    6044                 :            :          *
    6045                 :            :          * Note that if we lose to cgroup_enable_task_cg_lists(), @child
    6046                 :            :          * will remain in init_css_set.  This is safe because all tasks are
    6047                 :            :          * in the init_css_set before cg_links is enabled and there's no
    6048                 :            :          * operation which transfers all tasks out of init_css_set.
    6049                 :            :          */
    6050                 :          3 :         if (use_task_css_set_links) {
    6051                 :            :                 struct css_set *cset;
    6052                 :            : 
    6053                 :            :                 spin_lock_irq(&css_set_lock);
    6054                 :          3 :                 cset = task_css_set(current);
    6055                 :          3 :                 if (list_empty(&child->cg_list)) {
    6056                 :            :                         get_css_set(cset);
    6057                 :          3 :                         cset->nr_tasks++;
    6058                 :          3 :                         css_set_move_task(child, NULL, cset, false);
    6059                 :            :                 }
    6060                 :            : 
    6061                 :            :                 /*
    6062                 :            :                  * If the cgroup has to be frozen, the new task has too.
    6063                 :            :                  * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get
    6064                 :            :                  * the task into the frozen state.
    6065                 :            :                  */
    6066                 :          3 :                 if (unlikely(cgroup_task_freeze(child))) {
    6067                 :          0 :                         spin_lock(&child->sighand->siglock);
    6068                 :          0 :                         WARN_ON_ONCE(child->frozen);
    6069                 :          0 :                         child->jobctl |= JOBCTL_TRAP_FREEZE;
    6070                 :          0 :                         spin_unlock(&child->sighand->siglock);
    6071                 :            : 
    6072                 :            :                         /*
    6073                 :            :                          * Calling cgroup_update_frozen() isn't required here,
    6074                 :            :                          * because it will be called anyway a bit later
    6075                 :            :                          * from do_freezer_trap(). So we avoid cgroup's
    6076                 :            :                          * transient switch from the frozen state and back.
    6077                 :            :                          */
    6078                 :            :                 }
    6079                 :            : 
    6080                 :            :                 spin_unlock_irq(&css_set_lock);
    6081                 :            :         }
    6082                 :            : 
    6083                 :            :         /*
    6084                 :            :          * Call ss->fork().  This must happen after @child is linked on
    6085                 :            :          * css_set; otherwise, @child might change state between ->fork()
    6086                 :            :          * and addition to css_set.
    6087                 :            :          */
    6088                 :          3 :         do_each_subsys_mask(ss, i, have_fork_callback) {
    6089                 :          3 :                 ss->fork(child);
    6090                 :            :         } while_each_subsys_mask();
    6091                 :          3 : }
    6092                 :            : 
    6093                 :            : /**
    6094                 :            :  * cgroup_exit - detach cgroup from exiting task
    6095                 :            :  * @tsk: pointer to task_struct of exiting process
    6096                 :            :  *
    6097                 :            :  * Description: Detach cgroup from @tsk and release it.
    6098                 :            :  *
    6099                 :            :  * Note that cgroups marked notify_on_release force every task in
    6100                 :            :  * them to take the global cgroup_mutex mutex when exiting.
    6101                 :            :  * This could impact scaling on very large systems.  Be reluctant to
    6102                 :            :  * use notify_on_release cgroups where very high task exit scaling
    6103                 :            :  * is required on large systems.
    6104                 :            :  *
    6105                 :            :  * We set the exiting tasks cgroup to the root cgroup (top_cgroup).  We
    6106                 :            :  * call cgroup_exit() while the task is still competent to handle
    6107                 :            :  * notify_on_release(), then leave the task attached to the root cgroup in
    6108                 :            :  * each hierarchy for the remainder of its exit.  No need to bother with
    6109                 :            :  * init_css_set refcnting.  init_css_set never goes away and we can't race
    6110                 :            :  * with migration path - PF_EXITING is visible to migration path.
    6111                 :            :  */
    6112                 :          3 : void cgroup_exit(struct task_struct *tsk)
    6113                 :            : {
    6114                 :            :         struct cgroup_subsys *ss;
    6115                 :            :         struct css_set *cset;
    6116                 :            :         int i;
    6117                 :            : 
    6118                 :            :         /*
    6119                 :            :          * Unlink from @tsk from its css_set.  As migration path can't race
    6120                 :            :          * with us, we can check css_set and cg_list without synchronization.
    6121                 :            :          */
    6122                 :            :         cset = task_css_set(tsk);
    6123                 :            : 
    6124                 :          3 :         if (!list_empty(&tsk->cg_list)) {
    6125                 :            :                 spin_lock_irq(&css_set_lock);
    6126                 :          3 :                 css_set_move_task(tsk, cset, NULL, false);
    6127                 :          3 :                 list_add_tail(&tsk->cg_list, &cset->dying_tasks);
    6128                 :          3 :                 cset->nr_tasks--;
    6129                 :            : 
    6130                 :          3 :                 WARN_ON_ONCE(cgroup_task_frozen(tsk));
    6131                 :          3 :                 if (unlikely(cgroup_task_freeze(tsk)))
    6132                 :          0 :                         cgroup_update_frozen(task_dfl_cgroup(tsk));
    6133                 :            : 
    6134                 :            :                 spin_unlock_irq(&css_set_lock);
    6135                 :            :         } else {
    6136                 :            :                 get_css_set(cset);
    6137                 :            :         }
    6138                 :            : 
    6139                 :            :         /* see cgroup_post_fork() for details */
    6140                 :          3 :         do_each_subsys_mask(ss, i, have_exit_callback) {
    6141                 :          3 :                 ss->exit(tsk);
    6142                 :            :         } while_each_subsys_mask();
    6143                 :          3 : }
    6144                 :            : 
    6145                 :          3 : void cgroup_release(struct task_struct *task)
    6146                 :            : {
    6147                 :            :         struct cgroup_subsys *ss;
    6148                 :            :         int ssid;
    6149                 :            : 
    6150                 :          3 :         do_each_subsys_mask(ss, ssid, have_release_callback) {
    6151                 :          3 :                 ss->release(task);
    6152                 :            :         } while_each_subsys_mask();
    6153                 :            : 
    6154                 :          3 :         if (use_task_css_set_links) {
    6155                 :            :                 spin_lock_irq(&css_set_lock);
    6156                 :          3 :                 css_set_skip_task_iters(task_css_set(task), task);
    6157                 :          3 :                 list_del_init(&task->cg_list);
    6158                 :            :                 spin_unlock_irq(&css_set_lock);
    6159                 :            :         }
    6160                 :          3 : }
    6161                 :            : 
    6162                 :          3 : void cgroup_free(struct task_struct *task)
    6163                 :            : {
    6164                 :            :         struct css_set *cset = task_css_set(task);
    6165                 :          3 :         put_css_set(cset);
    6166                 :          3 : }
    6167                 :            : 
    6168                 :          3 : static int __init cgroup_disable(char *str)
    6169                 :            : {
    6170                 :            :         struct cgroup_subsys *ss;
    6171                 :            :         char *token;
    6172                 :            :         int i;
    6173                 :            : 
    6174                 :          3 :         while ((token = strsep(&str, ",")) != NULL) {
    6175                 :          3 :                 if (!*token)
    6176                 :          0 :                         continue;
    6177                 :            : 
    6178                 :          3 :                 for_each_subsys(ss, i) {
    6179                 :          3 :                         if (strcmp(token, ss->name) &&
    6180                 :          3 :                             strcmp(token, ss->legacy_name))
    6181                 :          3 :                                 continue;
    6182                 :          3 :                         cgroup_disable_mask |= 1 << i;
    6183                 :            :                 }
    6184                 :            :         }
    6185                 :          3 :         return 1;
    6186                 :            : }
    6187                 :            : __setup("cgroup_disable=", cgroup_disable);
    6188                 :            : 
    6189                 :          0 : static int __init cgroup_enable(char *str)
    6190                 :            : {
    6191                 :            :         struct cgroup_subsys *ss;
    6192                 :            :         char *token;
    6193                 :            :         int i;
    6194                 :            : 
    6195                 :          0 :         while ((token = strsep(&str, ",")) != NULL) {
    6196                 :          0 :                 if (!*token)
    6197                 :          0 :                         continue;
    6198                 :            : 
    6199                 :          0 :                 for_each_subsys(ss, i) {
    6200                 :          0 :                         if (strcmp(token, ss->name) &&
    6201                 :          0 :                             strcmp(token, ss->legacy_name))
    6202                 :          0 :                                 continue;
    6203                 :            : 
    6204                 :          0 :                         cgroup_enable_mask |= 1 << i;
    6205                 :            :                 }
    6206                 :            :         }
    6207                 :          0 :         return 1;
    6208                 :            : }
    6209                 :            : __setup("cgroup_enable=", cgroup_enable);
    6210                 :            : 
    6211                 :          0 : void __init __weak enable_debug_cgroup(void) { }
    6212                 :            : 
    6213                 :          0 : static int __init enable_cgroup_debug(char *str)
    6214                 :            : {
    6215                 :          0 :         cgroup_debug = true;
    6216                 :          0 :         enable_debug_cgroup();
    6217                 :          0 :         return 1;
    6218                 :            : }
    6219                 :            : __setup("cgroup_debug", enable_cgroup_debug);
    6220                 :            : 
    6221                 :            : /**
    6222                 :            :  * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
    6223                 :            :  * @dentry: directory dentry of interest
    6224                 :            :  * @ss: subsystem of interest
    6225                 :            :  *
    6226                 :            :  * If @dentry is a directory for a cgroup which has @ss enabled on it, try
    6227                 :            :  * to get the corresponding css and return it.  If such css doesn't exist
    6228                 :            :  * or can't be pinned, an ERR_PTR value is returned.
    6229                 :            :  */
    6230                 :          3 : struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
    6231                 :            :                                                        struct cgroup_subsys *ss)
    6232                 :            : {
    6233                 :          3 :         struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
    6234                 :          3 :         struct file_system_type *s_type = dentry->d_sb->s_type;
    6235                 :            :         struct cgroup_subsys_state *css = NULL;
    6236                 :            :         struct cgroup *cgrp;
    6237                 :            : 
    6238                 :            :         /* is @dentry a cgroup dir? */
    6239                 :          3 :         if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
    6240                 :          3 :             !kn || kernfs_type(kn) != KERNFS_DIR)
    6241                 :            :                 return ERR_PTR(-EBADF);
    6242                 :            : 
    6243                 :            :         rcu_read_lock();
    6244                 :            : 
    6245                 :            :         /*
    6246                 :            :          * This path doesn't originate from kernfs and @kn could already
    6247                 :            :          * have been or be removed at any point.  @kn->priv is RCU
    6248                 :            :          * protected for this access.  See css_release_work_fn() for details.
    6249                 :            :          */
    6250                 :            :         cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
    6251                 :          3 :         if (cgrp)
    6252                 :            :                 css = cgroup_css(cgrp, ss);
    6253                 :            : 
    6254                 :          3 :         if (!css || !css_tryget_online(css))
    6255                 :            :                 css = ERR_PTR(-ENOENT);
    6256                 :            : 
    6257                 :            :         rcu_read_unlock();
    6258                 :          3 :         return css;
    6259                 :            : }
    6260                 :            : 
    6261                 :            : /**
    6262                 :            :  * css_from_id - lookup css by id
    6263                 :            :  * @id: the cgroup id
    6264                 :            :  * @ss: cgroup subsys to be looked into
    6265                 :            :  *
    6266                 :            :  * Returns the css if there's valid one with @id, otherwise returns NULL.
    6267                 :            :  * Should be called under rcu_read_lock().
    6268                 :            :  */
    6269                 :          0 : struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
    6270                 :            : {
    6271                 :            :         WARN_ON_ONCE(!rcu_read_lock_held());
    6272                 :          0 :         return idr_find(&ss->css_idr, id);
    6273                 :            : }
    6274                 :            : 
    6275                 :            : /**
    6276                 :            :  * cgroup_get_from_path - lookup and get a cgroup from its default hierarchy path
    6277                 :            :  * @path: path on the default hierarchy
    6278                 :            :  *
    6279                 :            :  * Find the cgroup at @path on the default hierarchy, increment its
    6280                 :            :  * reference count and return it.  Returns pointer to the found cgroup on
    6281                 :            :  * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR)
    6282                 :            :  * if @path points to a non-directory.
    6283                 :            :  */
    6284                 :          0 : struct cgroup *cgroup_get_from_path(const char *path)
    6285                 :            : {
    6286                 :            :         struct kernfs_node *kn;
    6287                 :            :         struct cgroup *cgrp;
    6288                 :            : 
    6289                 :          0 :         mutex_lock(&cgroup_mutex);
    6290                 :            : 
    6291                 :          0 :         kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
    6292                 :          0 :         if (kn) {
    6293                 :          0 :                 if (kernfs_type(kn) == KERNFS_DIR) {
    6294                 :          0 :                         cgrp = kn->priv;
    6295                 :          0 :                         cgroup_get_live(cgrp);
    6296                 :            :                 } else {
    6297                 :            :                         cgrp = ERR_PTR(-ENOTDIR);
    6298                 :            :                 }
    6299                 :          0 :                 kernfs_put(kn);
    6300                 :            :         } else {
    6301                 :            :                 cgrp = ERR_PTR(-ENOENT);
    6302                 :            :         }
    6303                 :            : 
    6304                 :          0 :         mutex_unlock(&cgroup_mutex);
    6305                 :          0 :         return cgrp;
    6306                 :            : }
    6307                 :            : EXPORT_SYMBOL_GPL(cgroup_get_from_path);
    6308                 :            : 
    6309                 :            : /**
    6310                 :            :  * cgroup_get_from_fd - get a cgroup pointer from a fd
    6311                 :            :  * @fd: fd obtained by open(cgroup2_dir)
    6312                 :            :  *
    6313                 :            :  * Find the cgroup from a fd which should be obtained
    6314                 :            :  * by opening a cgroup directory.  Returns a pointer to the
    6315                 :            :  * cgroup on success. ERR_PTR is returned if the cgroup
    6316                 :            :  * cannot be found.
    6317                 :            :  */
    6318                 :          3 : struct cgroup *cgroup_get_from_fd(int fd)
    6319                 :            : {
    6320                 :            :         struct cgroup_subsys_state *css;
    6321                 :            :         struct cgroup *cgrp;
    6322                 :            :         struct file *f;
    6323                 :            : 
    6324                 :          3 :         f = fget_raw(fd);
    6325                 :          3 :         if (!f)
    6326                 :            :                 return ERR_PTR(-EBADF);
    6327                 :            : 
    6328                 :          3 :         css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
    6329                 :          3 :         fput(f);
    6330                 :          3 :         if (IS_ERR(css))
    6331                 :            :                 return ERR_CAST(css);
    6332                 :            : 
    6333                 :          3 :         cgrp = css->cgroup;
    6334                 :          3 :         if (!cgroup_on_dfl(cgrp)) {
    6335                 :            :                 cgroup_put(cgrp);
    6336                 :            :                 return ERR_PTR(-EBADF);
    6337                 :            :         }
    6338                 :            : 
    6339                 :            :         return cgrp;
    6340                 :            : }
    6341                 :            : EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
    6342                 :            : 
    6343                 :            : static u64 power_of_ten(int power)
    6344                 :            : {
    6345                 :            :         u64 v = 1;
    6346                 :          0 :         while (power--)
    6347                 :          0 :                 v *= 10;
    6348                 :          0 :         return v;
    6349                 :            : }
    6350                 :            : 
    6351                 :            : /**
    6352                 :            :  * cgroup_parse_float - parse a floating number
    6353                 :            :  * @input: input string
    6354                 :            :  * @dec_shift: number of decimal digits to shift
    6355                 :            :  * @v: output
    6356                 :            :  *
    6357                 :            :  * Parse a decimal floating point number in @input and store the result in
    6358                 :            :  * @v with decimal point right shifted @dec_shift times.  For example, if
    6359                 :            :  * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345.
    6360                 :            :  * Returns 0 on success, -errno otherwise.
    6361                 :            :  *
    6362                 :            :  * There's nothing cgroup specific about this function except that it's
    6363                 :            :  * currently the only user.
    6364                 :            :  */
    6365                 :          0 : int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
    6366                 :            : {
    6367                 :          0 :         s64 whole, frac = 0;
    6368                 :          0 :         int fstart = 0, fend = 0, flen;
    6369                 :            : 
    6370                 :          0 :         if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend))
    6371                 :            :                 return -EINVAL;
    6372                 :          0 :         if (frac < 0)
    6373                 :            :                 return -EINVAL;
    6374                 :            : 
    6375                 :          0 :         flen = fend > fstart ? fend - fstart : 0;
    6376                 :          0 :         if (flen < dec_shift)
    6377                 :          0 :                 frac *= power_of_ten(dec_shift - flen);
    6378                 :            :         else
    6379                 :          0 :                 frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift));
    6380                 :            : 
    6381                 :          0 :         *v = whole * power_of_ten(dec_shift) + frac;
    6382                 :          0 :         return 0;
    6383                 :            : }
    6384                 :            : 
    6385                 :            : /*
    6386                 :            :  * sock->sk_cgrp_data handling.  For more info, see sock_cgroup_data
    6387                 :            :  * definition in cgroup-defs.h.
    6388                 :            :  */
    6389                 :            : #ifdef CONFIG_SOCK_CGROUP_DATA
    6390                 :            : 
    6391                 :            : #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
    6392                 :            : 
    6393                 :            : DEFINE_SPINLOCK(cgroup_sk_update_lock);
    6394                 :            : static bool cgroup_sk_alloc_disabled __read_mostly;
    6395                 :            : 
    6396                 :          0 : void cgroup_sk_alloc_disable(void)
    6397                 :            : {
    6398                 :          0 :         if (cgroup_sk_alloc_disabled)
    6399                 :          0 :                 return;
    6400                 :          0 :         pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
    6401                 :          0 :         cgroup_sk_alloc_disabled = true;
    6402                 :            : }
    6403                 :            : 
    6404                 :            : #else
    6405                 :            : 
    6406                 :            : #define cgroup_sk_alloc_disabled        false
    6407                 :            : 
    6408                 :            : #endif
    6409                 :            : 
    6410                 :          3 : void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
    6411                 :            : {
    6412                 :          3 :         if (cgroup_sk_alloc_disabled) {
    6413                 :          0 :                 skcd->no_refcnt = 1;
    6414                 :          0 :                 return;
    6415                 :            :         }
    6416                 :            : 
    6417                 :            :         /* Don't associate the sock with unrelated interrupted task's cgroup. */
    6418                 :          3 :         if (in_interrupt())
    6419                 :            :                 return;
    6420                 :            : 
    6421                 :            :         rcu_read_lock();
    6422                 :            : 
    6423                 :            :         while (true) {
    6424                 :            :                 struct css_set *cset;
    6425                 :            : 
    6426                 :          3 :                 cset = task_css_set(current);
    6427                 :          3 :                 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
    6428                 :          3 :                         skcd->val = (unsigned long)cset->dfl_cgrp;
    6429                 :          3 :                         cgroup_bpf_get(cset->dfl_cgrp);
    6430                 :            :                         break;
    6431                 :            :                 }
    6432                 :          0 :                 cpu_relax();
    6433                 :          0 :         }
    6434                 :            : 
    6435                 :            :         rcu_read_unlock();
    6436                 :            : }
    6437                 :            : 
    6438                 :          0 : void cgroup_sk_clone(struct sock_cgroup_data *skcd)
    6439                 :            : {
    6440                 :          0 :         if (skcd->val) {
    6441                 :          0 :                 if (skcd->no_refcnt)
    6442                 :          0 :                         return;
    6443                 :            :                 /*
    6444                 :            :                  * We might be cloning a socket which is left in an empty
    6445                 :            :                  * cgroup and the cgroup might have already been rmdir'd.
    6446                 :            :                  * Don't use cgroup_get_live().
    6447                 :            :                  */
    6448                 :            :                 cgroup_get(sock_cgroup_ptr(skcd));
    6449                 :            :                 cgroup_bpf_get(sock_cgroup_ptr(skcd));
    6450                 :            :         }
    6451                 :            : }
    6452                 :            : 
    6453                 :          3 : void cgroup_sk_free(struct sock_cgroup_data *skcd)
    6454                 :            : {
    6455                 :            :         struct cgroup *cgrp = sock_cgroup_ptr(skcd);
    6456                 :            : 
    6457                 :          3 :         if (skcd->no_refcnt)
    6458                 :          3 :                 return;
    6459                 :            :         cgroup_bpf_put(cgrp);
    6460                 :            :         cgroup_put(cgrp);
    6461                 :            : }
    6462                 :            : 
    6463                 :            : #endif  /* CONFIG_SOCK_CGROUP_DATA */
    6464                 :            : 
    6465                 :            : #ifdef CONFIG_CGROUP_BPF
    6466                 :          3 : int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
    6467                 :            :                       enum bpf_attach_type type, u32 flags)
    6468                 :            : {
    6469                 :            :         int ret;
    6470                 :            : 
    6471                 :          3 :         mutex_lock(&cgroup_mutex);
    6472                 :          3 :         ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
    6473                 :          3 :         mutex_unlock(&cgroup_mutex);
    6474                 :          3 :         return ret;
    6475                 :            : }
    6476                 :          0 : int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
    6477                 :            :                       enum bpf_attach_type type, u32 flags)
    6478                 :            : {
    6479                 :            :         int ret;
    6480                 :            : 
    6481                 :          0 :         mutex_lock(&cgroup_mutex);
    6482                 :          0 :         ret = __cgroup_bpf_detach(cgrp, prog, type);
    6483                 :          0 :         mutex_unlock(&cgroup_mutex);
    6484                 :          0 :         return ret;
    6485                 :            : }
    6486                 :          0 : int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
    6487                 :            :                      union bpf_attr __user *uattr)
    6488                 :            : {
    6489                 :            :         int ret;
    6490                 :            : 
    6491                 :          0 :         mutex_lock(&cgroup_mutex);
    6492                 :          0 :         ret = __cgroup_bpf_query(cgrp, attr, uattr);
    6493                 :          0 :         mutex_unlock(&cgroup_mutex);
    6494                 :          0 :         return ret;
    6495                 :            : }
    6496                 :            : #endif /* CONFIG_CGROUP_BPF */
    6497                 :            : 
    6498                 :            : #ifdef CONFIG_SYSFS
    6499                 :          0 : static ssize_t show_delegatable_files(struct cftype *files, char *buf,
    6500                 :            :                                       ssize_t size, const char *prefix)
    6501                 :            : {
    6502                 :            :         struct cftype *cft;
    6503                 :            :         ssize_t ret = 0;
    6504                 :            : 
    6505                 :          0 :         for (cft = files; cft && cft->name[0] != '\0'; cft++) {
    6506                 :          0 :                 if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
    6507                 :          0 :                         continue;
    6508                 :            : 
    6509                 :          0 :                 if (prefix)
    6510                 :          0 :                         ret += snprintf(buf + ret, size - ret, "%s.", prefix);
    6511                 :            : 
    6512                 :          0 :                 ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
    6513                 :            : 
    6514                 :          0 :                 if (WARN_ON(ret >= size))
    6515                 :            :                         break;
    6516                 :            :         }
    6517                 :            : 
    6518                 :          0 :         return ret;
    6519                 :            : }
    6520                 :            : 
    6521                 :          0 : static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
    6522                 :            :                               char *buf)
    6523                 :            : {
    6524                 :            :         struct cgroup_subsys *ss;
    6525                 :            :         int ssid;
    6526                 :            :         ssize_t ret = 0;
    6527                 :            : 
    6528                 :          0 :         ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
    6529                 :            :                                      NULL);
    6530                 :            : 
    6531                 :          0 :         for_each_subsys(ss, ssid)
    6532                 :          0 :                 ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
    6533                 :          0 :                                               PAGE_SIZE - ret,
    6534                 :            :                                               cgroup_subsys_name[ssid]);
    6535                 :            : 
    6536                 :          0 :         return ret;
    6537                 :            : }
    6538                 :            : static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
    6539                 :            : 
    6540                 :          0 : static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
    6541                 :            :                              char *buf)
    6542                 :            : {
    6543                 :          0 :         return snprintf(buf, PAGE_SIZE, "nsdelegate\nmemory_localevents\n");
    6544                 :            : }
    6545                 :            : static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
    6546                 :            : 
    6547                 :            : static struct attribute *cgroup_sysfs_attrs[] = {
    6548                 :            :         &cgroup_delegate_attr.attr,
    6549                 :            :         &cgroup_features_attr.attr,
    6550                 :            :         NULL,
    6551                 :            : };
    6552                 :            : 
    6553                 :            : static const struct attribute_group cgroup_sysfs_attr_group = {
    6554                 :            :         .attrs = cgroup_sysfs_attrs,
    6555                 :            :         .name = "cgroup",
    6556                 :            : };
    6557                 :            : 
    6558                 :          3 : static int __init cgroup_sysfs_init(void)
    6559                 :            : {
    6560                 :          3 :         return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
    6561                 :            : }
    6562                 :            : subsys_initcall(cgroup_sysfs_init);
    6563                 :            : 
    6564                 :            : #endif /* CONFIG_SYSFS */

Generated by: LCOV version 1.14