LCOV - Real - mm/slub.c

LCOV - code coverage report

Current view:	top level - mm - slub.c (source / functions)		Hit	Total	Coverage
Test:	Real	Lines:	622	1573	39.5 %
Date:	2020-10-17 15:46:16	Functions:	0	172	0.0 %
Legend:	Neither, QEMU, Real, Both	Branches:	0	0	-

           Branch data     Line data    Source code

       1                 :            : // SPDX-License-Identifier: GPL-2.0
       2                 :            : /*
       3                 :            :  * SLUB: A slab allocator that limits cache line use instead of queuing
       4                 :            :  * objects in per cpu and per node lists.
       5                 :            :  *
       6                 :            :  * The allocator synchronizes using per slab locks or atomic operatios
       7                 :            :  * and only uses a centralized lock to manage a pool of partial slabs.
       8                 :            :  *
       9                 :            :  * (C) 2007 SGI, Christoph Lameter
      10                 :            :  * (C) 2011 Linux Foundation, Christoph Lameter
      11                 :            :  */
      12                 :            : 
      13                 :            : #include <linux/mm.h>
      14                 :            : #include <linux/swap.h> /* struct reclaim_state */
      15                 :            : #include <linux/module.h>
      16                 :            : #include <linux/bit_spinlock.h>
      17                 :            : #include <linux/interrupt.h>
      18                 :            : #include <linux/bitops.h>
      19                 :            : #include <linux/slab.h>
      20                 :            : #include "slab.h"
      21                 :            : #include <linux/proc_fs.h>
      22                 :            : #include <linux/seq_file.h>
      23                 :            : #include <linux/kasan.h>
      24                 :            : #include <linux/cpu.h>
      25                 :            : #include <linux/cpuset.h>
      26                 :            : #include <linux/mempolicy.h>
      27                 :            : #include <linux/ctype.h>
      28                 :            : #include <linux/debugobjects.h>
      29                 :            : #include <linux/kallsyms.h>
      30                 :            : #include <linux/memory.h>
      31                 :            : #include <linux/math64.h>
      32                 :            : #include <linux/fault-inject.h>
      33                 :            : #include <linux/stacktrace.h>
      34                 :            : #include <linux/prefetch.h>
      35                 :            : #include <linux/memcontrol.h>
      36                 :            : #include <linux/random.h>
      37                 :            : 
      38                 :            : #include <trace/events/kmem.h>
      39                 :            : 
      40                 :            : #include "internal.h"
      41                 :            : 
      42                 :            : /*
      43                 :            :  * Lock order:
      44                 :            :  *   1. slab_mutex (Global Mutex)
      45                 :            :  *   2. node->list_lock
      46                 :            :  *   3. slab_lock(page) (Only on some arches and for debugging)
      47                 :            :  *
      48                 :            :  *   slab_mutex
      49                 :            :  *
      50                 :            :  *   The role of the slab_mutex is to protect the list of all the slabs
      51                 :            :  *   and to synchronize major metadata changes to slab cache structures.
      52                 :            :  *
      53                 :            :  *   The slab_lock is only used for debugging and on arches that do not
      54                 :            :  *   have the ability to do a cmpxchg_double. It only protects:
      55                 :            :  *      A. page->freelist    -> List of object free in a page
      56                 :            :  *      B. page->inuse               -> Number of objects in use
      57                 :            :  *      C. page->objects     -> Number of objects in page
      58                 :            :  *      D. page->frozen              -> frozen state
      59                 :            :  *
      60                 :            :  *   If a slab is frozen then it is exempt from list management. It is not
      61                 :            :  *   on any list except per cpu partial list. The processor that froze the
      62                 :            :  *   slab is the one who can perform list operations on the page. Other
      63                 :            :  *   processors may put objects onto the freelist but the processor that
      64                 :            :  *   froze the slab is the only one that can retrieve the objects from the
      65                 :            :  *   page's freelist.
      66                 :            :  *
      67                 :            :  *   The list_lock protects the partial and full list on each node and
      68                 :            :  *   the partial slab counter. If taken then no new slabs may be added or
      69                 :            :  *   removed from the lists nor make the number of partial slabs be modified.
      70                 :            :  *   (Note that the total number of slabs is an atomic value that may be
      71                 :            :  *   modified without taking the list lock).
      72                 :            :  *
      73                 :            :  *   The list_lock is a centralized lock and thus we avoid taking it as
      74                 :            :  *   much as possible. As long as SLUB does not have to handle partial
      75                 :            :  *   slabs, operations can continue without any centralized lock. F.e.
      76                 :            :  *   allocating a long series of objects that fill up slabs does not require
      77                 :            :  *   the list lock.
      78                 :            :  *   Interrupts are disabled during allocation and deallocation in order to
      79                 :            :  *   make the slab allocator safe to use in the context of an irq. In addition
      80                 :            :  *   interrupts are disabled to ensure that the processor does not change
      81                 :            :  *   while handling per_cpu slabs, due to kernel preemption.
      82                 :            :  *
      83                 :            :  * SLUB assigns one slab for allocation to each processor.
      84                 :            :  * Allocations only occur from these slabs called cpu slabs.
      85                 :            :  *
      86                 :            :  * Slabs with free elements are kept on a partial list and during regular
      87                 :            :  * operations no list for full slabs is used. If an object in a full slab is
      88                 :            :  * freed then the slab will show up again on the partial lists.
      89                 :            :  * We track full slabs for debugging purposes though because otherwise we
      90                 :            :  * cannot scan all objects.
      91                 :            :  *
      92                 :            :  * Slabs are freed when they become empty. Teardown and setup is
      93                 :            :  * minimal so we rely on the page allocators per cpu caches for
      94                 :            :  * fast frees and allocs.
      95                 :            :  *
      96                 :            :  * Overloading of page flags that are otherwise used for LRU management.
      97                 :            :  *
      98                 :            :  * PageActive           The slab is frozen and exempt from list processing.
      99                 :            :  *                      This means that the slab is dedicated to a purpose
     100                 :            :  *                      such as satisfying allocations for a specific
     101                 :            :  *                      processor. Objects may be freed in the slab while
     102                 :            :  *                      it is frozen but slab_free will then skip the usual
     103                 :            :  *                      list operations. It is up to the processor holding
     104                 :            :  *                      the slab to integrate the slab into the slab lists
     105                 :            :  *                      when the slab is no longer needed.
     106                 :            :  *
     107                 :            :  *                      One use of this flag is to mark slabs that are
     108                 :            :  *                      used for allocations. Then such a slab becomes a cpu
     109                 :            :  *                      slab. The cpu slab may be equipped with an additional
     110                 :            :  *                      freelist that allows lockless access to
     111                 :            :  *                      free objects in addition to the regular freelist
     112                 :            :  *                      that requires the slab lock.
     113                 :            :  *
     114                 :            :  * PageError            Slab requires special handling due to debug
     115                 :            :  *                      options set. This moves slab handling out of
     116                 :            :  *                      the fast path and disables lockless freelists.
     117                 :            :  */
     118                 :            : 
     119                 :            : static inline int kmem_cache_debug(struct kmem_cache *s)
     120                 :            : {
     121                 :            : #ifdef CONFIG_SLUB_DEBUG
     122                 :          3 :         return unlikely(s->flags & SLAB_DEBUG_FLAGS);
     123                 :            : #else
     124                 :            :         return 0;
     125                 :            : #endif
     126                 :            : }
     127                 :            : 
     128                 :          0 : void *fixup_red_left(struct kmem_cache *s, void *p)
     129                 :            : {
     130                 :          3 :         if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
     131                 :          0 :                 p += s->red_left_pad;
     132                 :            : 
     133                 :          0 :         return p;
     134                 :            : }
     135                 :            : 
     136                 :            : static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
     137                 :            : {
     138                 :            : #ifdef CONFIG_SLUB_CPU_PARTIAL
     139                 :            :         return !kmem_cache_debug(s);
     140                 :            : #else
     141                 :            :         return false;
     142                 :            : #endif
     143                 :            : }
     144                 :            : 
     145                 :            : /*
     146                 :            :  * Issues still to be resolved:
     147                 :            :  *
     148                 :            :  * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
     149                 :            :  *
     150                 :            :  * - Variable sizing of the per node arrays
     151                 :            :  */
     152                 :            : 
     153                 :            : /* Enable to test recovery from slab corruption on boot */
     154                 :            : #undef SLUB_RESILIENCY_TEST
     155                 :            : 
     156                 :            : /* Enable to log cmpxchg failures */
     157                 :            : #undef SLUB_DEBUG_CMPXCHG
     158                 :            : 
     159                 :            : /*
     160                 :            :  * Mininum number of partial slabs. These will be left on the partial
     161                 :            :  * lists even if they are empty. kmem_cache_shrink may reclaim them.
     162                 :            :  */
     163                 :            : #define MIN_PARTIAL 5
     164                 :            : 
     165                 :            : /*
     166                 :            :  * Maximum number of desirable partial slabs.
     167                 :            :  * The existence of more partial slabs makes kmem_cache_shrink
     168                 :            :  * sort the partial list by the number of objects in use.
     169                 :            :  */
     170                 :            : #define MAX_PARTIAL 10
     171                 :            : 
     172                 :            : #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
     173                 :            :                                 SLAB_POISON | SLAB_STORE_USER)
     174                 :            : 
     175                 :            : /*
     176                 :            :  * These debug flags cannot use CMPXCHG because there might be consistency
     177                 :            :  * issues when checking or reading debug information
     178                 :            :  */
     179                 :            : #define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
     180                 :            :                                 SLAB_TRACE)
     181                 :            : 
     182                 :            : 
     183                 :            : /*
     184                 :            :  * Debugging flags that require metadata to be stored in the slab.  These get
     185                 :            :  * disabled when slub_debug=O is used and a cache's min order increases with
     186                 :            :  * metadata.
     187                 :            :  */
     188                 :            : #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
     189                 :            : 
     190                 :            : #define OO_SHIFT        16
     191                 :            : #define OO_MASK         ((1 << OO_SHIFT) - 1)
     192                 :            : #define MAX_OBJS_PER_PAGE       32767 /* since page.objects is u15 */
     193                 :            : 
     194                 :            : /* Internal SLUB flags */
     195                 :            : /* Poison object */
     196                 :            : #define __OBJECT_POISON         ((slab_flags_t __force)0x80000000U)
     197                 :            : /* Use cmpxchg_double */
     198                 :            : #define __CMPXCHG_DOUBLE        ((slab_flags_t __force)0x40000000U)
     199                 :            : 
     200                 :            : /*
     201                 :            :  * Tracking user of a slab.
     202                 :            :  */
     203                 :            : #define TRACK_ADDRS_COUNT 16
     204                 :            : struct track {
     205                 :            :         unsigned long addr;     /* Called from address */
     206                 :            : #ifdef CONFIG_STACKTRACE
     207                 :            :         unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
     208                 :            : #endif
     209                 :            :         int cpu;                /* Was running on cpu */
     210                 :            :         int pid;                /* Pid context */
     211                 :            :         unsigned long when;     /* When did the operation occur */
     212                 :            : };
     213                 :            : 
     214                 :            : enum track_item { TRACK_ALLOC, TRACK_FREE };
     215                 :            : 
     216                 :            : #ifdef CONFIG_SYSFS
     217                 :            : static int sysfs_slab_add(struct kmem_cache *);
     218                 :            : static int sysfs_slab_alias(struct kmem_cache *, const char *);
     219                 :            : static void memcg_propagate_slab_attrs(struct kmem_cache *s);
     220                 :            : static void sysfs_slab_remove(struct kmem_cache *s);
     221                 :            : #else
     222                 :            : static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
     223                 :            : static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
     224                 :            :                                                         { return 0; }
     225                 :            : static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
     226                 :            : static inline void sysfs_slab_remove(struct kmem_cache *s) { }
     227                 :            : #endif
     228                 :            : 
     229                 :            : static inline void stat(const struct kmem_cache *s, enum stat_item si)
     230                 :            : {
     231                 :            : #ifdef CONFIG_SLUB_STATS
     232                 :            :         /*
     233                 :            :          * The rmw is racy on a preemptible kernel but this is acceptable, so
     234                 :            :          * avoid this_cpu_add()'s irq-disable overhead.
     235                 :            :          */
     236                 :            :         raw_cpu_inc(s->cpu_slab->stat[si]);
     237                 :            : #endif
     238                 :            : }
     239                 :            : 
     240                 :            : /********************************************************************
     241                 :            :  *                      Core slab cache functions
     242                 :            :  *******************************************************************/
     243                 :            : 
     244                 :            : /*
     245                 :            :  * Returns freelist pointer (ptr). With hardening, this is obfuscated
     246                 :            :  * with an XOR of the address where the pointer is held and a per-cache
     247                 :            :  * random number.
     248                 :            :  */
     249                 :            : static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
     250                 :            :                                  unsigned long ptr_addr)
     251                 :            : {
     252                 :            : #ifdef CONFIG_SLAB_FREELIST_HARDENED
     253                 :            :         /*
     254                 :            :          * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged.
     255                 :            :          * Normally, this doesn't cause any issues, as both set_freepointer()
     256                 :            :          * and get_freepointer() are called with a pointer with the same tag.
     257                 :            :          * However, there are some issues with CONFIG_SLUB_DEBUG code. For
     258                 :            :          * example, when __free_slub() iterates over objects in a cache, it
     259                 :            :          * passes untagged pointers to check_object(). check_object() in turns
     260                 :            :          * calls get_freepointer() with an untagged pointer, which causes the
     261                 :            :          * freepointer to be restored incorrectly.
     262                 :            :          */
     263                 :            :         return (void *)((unsigned long)ptr ^ s->random ^
     264                 :            :                         swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
     265                 :            : #else
     266                 :            :         return ptr;
     267                 :            : #endif
     268                 :            : }
     269                 :            : 
     270                 :            : /* Returns the freelist pointer recorded at location ptr_addr. */
     271                 :            : static inline void *freelist_dereference(const struct kmem_cache *s,
     272                 :            :                                          void *ptr_addr)
     273                 :            : {
     274                 :          3 :         return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
     275                 :            :                             (unsigned long)ptr_addr);
     276                 :            : }
     277                 :            : 
     278                 :            : static inline void *get_freepointer(struct kmem_cache *s, void *object)
     279                 :            : {
     280                 :          3 :         return freelist_dereference(s, object + s->offset);
     281                 :            : }
     282                 :            : 
     283                 :          3 : static void prefetch_freepointer(const struct kmem_cache *s, void *object)
     284                 :            : {
     285                 :          3 :         prefetch(object + s->offset);
     286                 :          3 : }
     287                 :            : 
     288                 :          3 : static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
     289                 :            : {
     290                 :            :         unsigned long freepointer_addr;
     291                 :            :         void *p;
     292                 :            : 
     293                 :            :         if (!debug_pagealloc_enabled_static())
     294                 :            :                 return get_freepointer(s, object);
     295                 :            : 
     296                 :            :         freepointer_addr = (unsigned long)object + s->offset;
     297                 :            :         probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
     298                 :            :         return freelist_ptr(s, p, freepointer_addr);
     299                 :            : }
     300                 :            : 
     301                 :          3 : static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
     302                 :            : {
     303                 :          3 :         unsigned long freeptr_addr = (unsigned long)object + s->offset;
     304                 :            : 
     305                 :            : #ifdef CONFIG_SLAB_FREELIST_HARDENED
     306                 :            :         BUG_ON(object == fp); /* naive detection of double free or corruption */
     307                 :            : #endif
     308                 :            : 
     309                 :          3 :         *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
     310                 :          3 : }
     311                 :            : 
     312                 :            : /* Loop over all objects in a slab */
     313                 :            : #define for_each_object(__p, __s, __addr, __objects) \
     314                 :            :         for (__p = fixup_red_left(__s, __addr); \
     315                 :            :                 __p < (__addr) + (__objects) * (__s)->size; \
     316                 :            :                 __p += (__s)->size)
     317                 :            : 
     318                 :            : /* Determine object index from a given position */
     319                 :            : static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
     320                 :            : {
     321                 :          0 :         return (kasan_reset_tag(p) - addr) / s->size;
     322                 :            : }
     323                 :            : 
     324                 :            : static inline unsigned int order_objects(unsigned int order, unsigned int size)
     325                 :            : {
     326                 :          3 :         return ((unsigned int)PAGE_SIZE << order) / size;
     327                 :            : }
     328                 :            : 
     329                 :            : static inline struct kmem_cache_order_objects oo_make(unsigned int order,
     330                 :            :                 unsigned int size)
     331                 :            : {
     332                 :            :         struct kmem_cache_order_objects x = {
     333                 :          3 :                 (order << OO_SHIFT) + order_objects(order, size)
     334                 :            :         };
     335                 :            : 
     336                 :            :         return x;
     337                 :            : }
     338                 :            : 
     339                 :            : static inline unsigned int oo_order(struct kmem_cache_order_objects x)
     340                 :            : {
     341                 :          3 :         return x.x >> OO_SHIFT;
     342                 :            : }
     343                 :            : 
     344                 :            : static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
     345                 :            : {
     346                 :          3 :         return x.x & OO_MASK;
     347                 :            : }
     348                 :            : 
     349                 :            : /*
     350                 :            :  * Per slab locking using the pagelock
     351                 :            :  */
     352                 :            : static __always_inline void slab_lock(struct page *page)
     353                 :            : {
     354                 :            :         VM_BUG_ON_PAGE(PageTail(page), page);
     355                 :          3 :         bit_spin_lock(PG_locked, &page->flags);
     356                 :            : }
     357                 :            : 
     358                 :            : static __always_inline void slab_unlock(struct page *page)
     359                 :            : {
     360                 :            :         VM_BUG_ON_PAGE(PageTail(page), page);
     361                 :          3 :         __bit_spin_unlock(PG_locked, &page->flags);
     362                 :            : }
     363                 :            : 
     364                 :            : /* Interrupts must be disabled (for the fallback code to work right) */
     365                 :          3 : static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
     366                 :            :                 void *freelist_old, unsigned long counters_old,
     367                 :            :                 void *freelist_new, unsigned long counters_new,
     368                 :            :                 const char *n)
     369                 :            : {
     370                 :            :         VM_BUG_ON(!irqs_disabled());
     371                 :            : #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
     372                 :            :     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
     373                 :            :         if (s->flags & __CMPXCHG_DOUBLE) {
     374                 :            :                 if (cmpxchg_double(&page->freelist, &page->counters,
     375                 :            :                                    freelist_old, counters_old,
     376                 :            :                                    freelist_new, counters_new))
     377                 :            :                         return true;
     378                 :            :         } else
     379                 :            : #endif
     380                 :            :         {
     381                 :            :                 slab_lock(page);
     382                 :          3 :                 if (page->freelist == freelist_old &&
     383                 :          3 :                                         page->counters == counters_old) {
     384                 :          3 :                         page->freelist = freelist_new;
     385                 :          3 :                         page->counters = counters_new;
     386                 :            :                         slab_unlock(page);
     387                 :          3 :                         return true;
     388                 :            :                 }
     389                 :            :                 slab_unlock(page);
     390                 :            :         }
     391                 :            : 
     392                 :          3 :         cpu_relax();
     393                 :            :         stat(s, CMPXCHG_DOUBLE_FAIL);
     394                 :            : 
     395                 :            : #ifdef SLUB_DEBUG_CMPXCHG
     396                 :            :         pr_info("%s %s: cmpxchg double redo ", n, s->name);
     397                 :            : #endif
     398                 :            : 
     399                 :          3 :         return false;
     400                 :            : }
     401                 :            : 
     402                 :          3 : static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
     403                 :            :                 void *freelist_old, unsigned long counters_old,
     404                 :            :                 void *freelist_new, unsigned long counters_new,
     405                 :            :                 const char *n)
     406                 :            : {
     407                 :            : #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
     408                 :            :     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
     409                 :            :         if (s->flags & __CMPXCHG_DOUBLE) {
     410                 :            :                 if (cmpxchg_double(&page->freelist, &page->counters,
     411                 :            :                                    freelist_old, counters_old,
     412                 :            :                                    freelist_new, counters_new))
     413                 :            :                         return true;
     414                 :            :         } else
     415                 :            : #endif
     416                 :            :         {
     417                 :            :                 unsigned long flags;
     418                 :            : 
     419                 :          3 :                 local_irq_save(flags);
     420                 :            :                 slab_lock(page);
     421                 :          3 :                 if (page->freelist == freelist_old &&
     422                 :          3 :                                         page->counters == counters_old) {
     423                 :          3 :                         page->freelist = freelist_new;
     424                 :          3 :                         page->counters = counters_new;
     425                 :            :                         slab_unlock(page);
     426                 :          3 :                         local_irq_restore(flags);
     427                 :            :                         return true;
     428                 :            :                 }
     429                 :            :                 slab_unlock(page);
     430                 :          3 :                 local_irq_restore(flags);
     431                 :            :         }
     432                 :            : 
     433                 :          3 :         cpu_relax();
     434                 :            :         stat(s, CMPXCHG_DOUBLE_FAIL);
     435                 :            : 
     436                 :            : #ifdef SLUB_DEBUG_CMPXCHG
     437                 :            :         pr_info("%s %s: cmpxchg double redo ", n, s->name);
     438                 :            : #endif
     439                 :            : 
     440                 :          3 :         return false;
     441                 :            : }
     442                 :            : 
     443                 :            : #ifdef CONFIG_SLUB_DEBUG
     444                 :            : /*
     445                 :            :  * Determine a map of object in use on a page.
     446                 :            :  *
     447                 :            :  * Node listlock must be held to guarantee that the page does
     448                 :            :  * not vanish from under us.
     449                 :            :  */
     450                 :          0 : static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
     451                 :            : {
     452                 :            :         void *p;
     453                 :            :         void *addr = page_address(page);
     454                 :            : 
     455                 :          0 :         for (p = page->freelist; p; p = get_freepointer(s, p))
     456                 :          0 :                 set_bit(slab_index(p, s, addr), map);
     457                 :          0 : }
     458                 :            : 
     459                 :            : static inline unsigned int size_from_object(struct kmem_cache *s)
     460                 :            : {
     461                 :          0 :         if (s->flags & SLAB_RED_ZONE)
     462                 :          0 :                 return s->size - s->red_left_pad;
     463                 :            : 
     464                 :          0 :         return s->size;
     465                 :            : }
     466                 :            : 
     467                 :            : static inline void *restore_red_left(struct kmem_cache *s, void *p)
     468                 :            : {
     469                 :          0 :         if (s->flags & SLAB_RED_ZONE)
     470                 :          0 :                 p -= s->red_left_pad;
     471                 :            : 
     472                 :            :         return p;
     473                 :            : }
     474                 :            : 
     475                 :            : /*
     476                 :            :  * Debug settings:
     477                 :            :  */
     478                 :            : #if defined(CONFIG_SLUB_DEBUG_ON)
     479                 :            : static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
     480                 :            : #else
     481                 :            : static slab_flags_t slub_debug;
     482                 :            : #endif
     483                 :            : 
     484                 :            : static char *slub_debug_slabs;
     485                 :            : static int disable_higher_order_debug;
     486                 :            : 
     487                 :            : /*
     488                 :            :  * slub is about to manipulate internal object metadata.  This memory lies
     489                 :            :  * outside the range of the allocated object, so accessing it would normally
     490                 :            :  * be reported by kasan as a bounds error.  metadata_access_enable() is used
     491                 :            :  * to tell kasan that these accesses are OK.
     492                 :            :  */
     493                 :            : static inline void metadata_access_enable(void)
     494                 :            : {
     495                 :            :         kasan_disable_current();
     496                 :            : }
     497                 :            : 
     498                 :            : static inline void metadata_access_disable(void)
     499                 :            : {
     500                 :            :         kasan_enable_current();
     501                 :            : }
     502                 :            : 
     503                 :            : /*
     504                 :            :  * Object debugging
     505                 :            :  */
     506                 :            : 
     507                 :            : /* Verify that a pointer has an address that is valid within a slab page */
     508                 :          0 : static inline int check_valid_pointer(struct kmem_cache *s,
     509                 :            :                                 struct page *page, void *object)
     510                 :            : {
     511                 :            :         void *base;
     512                 :            : 
     513                 :          0 :         if (!object)
     514                 :            :                 return 1;
     515                 :            : 
     516                 :            :         base = page_address(page);
     517                 :            :         object = kasan_reset_tag(object);
     518                 :            :         object = restore_red_left(s, object);
     519                 :          0 :         if (object < base || object >= base + page->objects * s->size ||
     520                 :          0 :                 (object - base) % s->size) {
     521                 :            :                 return 0;
     522                 :            :         }
     523                 :            : 
     524                 :          0 :         return 1;
     525                 :            : }
     526                 :            : 
     527                 :            : static void print_section(char *level, char *text, u8 *addr,
     528                 :            :                           unsigned int length)
     529                 :            : {
     530                 :            :         metadata_access_enable();
     531                 :          0 :         print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
     532                 :            :                         length, 1);
     533                 :            :         metadata_access_disable();
     534                 :            : }
     535                 :            : 
     536                 :            : static struct track *get_track(struct kmem_cache *s, void *object,
     537                 :            :         enum track_item alloc)
     538                 :            : {
     539                 :            :         struct track *p;
     540                 :            : 
     541                 :          0 :         if (s->offset)
     542                 :          0 :                 p = object + s->offset + sizeof(void *);
     543                 :            :         else
     544                 :          0 :                 p = object + s->inuse;
     545                 :            : 
     546                 :          0 :         return p + alloc;
     547                 :            : }
     548                 :            : 
     549                 :          0 : static void set_track(struct kmem_cache *s, void *object,
     550                 :            :                         enum track_item alloc, unsigned long addr)
     551                 :            : {
     552                 :            :         struct track *p = get_track(s, object, alloc);
     553                 :            : 
     554                 :          0 :         if (addr) {
     555                 :            : #ifdef CONFIG_STACKTRACE
     556                 :            :                 unsigned int nr_entries;
     557                 :            : 
     558                 :            :                 metadata_access_enable();
     559                 :          0 :                 nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3);
     560                 :            :                 metadata_access_disable();
     561                 :            : 
     562                 :          0 :                 if (nr_entries < TRACK_ADDRS_COUNT)
     563                 :          0 :                         p->addrs[nr_entries] = 0;
     564                 :            : #endif
     565                 :          0 :                 p->addr = addr;
     566                 :          0 :                 p->cpu = smp_processor_id();
     567                 :          0 :                 p->pid = current->pid;
     568                 :          0 :                 p->when = jiffies;
     569                 :            :         } else {
     570                 :          0 :                 memset(p, 0, sizeof(struct track));
     571                 :            :         }
     572                 :          0 : }
     573                 :            : 
     574                 :          3 : static void init_tracking(struct kmem_cache *s, void *object)
     575                 :            : {
     576                 :          3 :         if (!(s->flags & SLAB_STORE_USER))
     577                 :          3 :                 return;
     578                 :            : 
     579                 :          0 :         set_track(s, object, TRACK_FREE, 0UL);
     580                 :          0 :         set_track(s, object, TRACK_ALLOC, 0UL);
     581                 :            : }
     582                 :            : 
     583                 :          0 : static void print_track(const char *s, struct track *t, unsigned long pr_time)
     584                 :            : {
     585                 :          0 :         if (!t->addr)
     586                 :          0 :                 return;
     587                 :            : 
     588                 :          0 :         pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
     589                 :            :                s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
     590                 :            : #ifdef CONFIG_STACKTRACE
     591                 :            :         {
     592                 :            :                 int i;
     593                 :          0 :                 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
     594                 :          0 :                         if (t->addrs[i])
     595                 :          0 :                                 pr_err("\t%pS\n", (void *)t->addrs[i]);
     596                 :            :                         else
     597                 :            :                                 break;
     598                 :            :         }
     599                 :            : #endif
     600                 :            : }
     601                 :            : 
     602                 :          0 : static void print_tracking(struct kmem_cache *s, void *object)
     603                 :            : {
     604                 :          0 :         unsigned long pr_time = jiffies;
     605                 :          0 :         if (!(s->flags & SLAB_STORE_USER))
     606                 :          0 :                 return;
     607                 :            : 
     608                 :          0 :         print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
     609                 :          0 :         print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
     610                 :            : }
     611                 :            : 
     612                 :          0 : static void print_page_info(struct page *page)
     613                 :            : {
     614                 :          0 :         pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
     615                 :            :                page, page->objects, page->inuse, page->freelist, page->flags);
     616                 :            : 
     617                 :          0 : }
     618                 :            : 
     619                 :          0 : static void slab_bug(struct kmem_cache *s, char *fmt, ...)
     620                 :            : {
     621                 :            :         struct va_format vaf;
     622                 :            :         va_list args;
     623                 :            : 
     624                 :          0 :         va_start(args, fmt);
     625                 :          0 :         vaf.fmt = fmt;
     626                 :          0 :         vaf.va = &args;
     627                 :          0 :         pr_err("=============================================================================\n");
     628                 :          0 :         pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
     629                 :          0 :         pr_err("-----------------------------------------------------------------------------\n\n");
     630                 :            : 
     631                 :          0 :         add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
     632                 :          0 :         va_end(args);
     633                 :          0 : }
     634                 :            : 
     635                 :          0 : static void slab_fix(struct kmem_cache *s, char *fmt, ...)
     636                 :            : {
     637                 :            :         struct va_format vaf;
     638                 :            :         va_list args;
     639                 :            : 
     640                 :          0 :         va_start(args, fmt);
     641                 :          0 :         vaf.fmt = fmt;
     642                 :          0 :         vaf.va = &args;
     643                 :          0 :         pr_err("FIX %s: %pV\n", s->name, &vaf);
     644                 :          0 :         va_end(args);
     645                 :          0 : }
     646                 :            : 
     647                 :          3 : static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
     648                 :            :                                void *freelist, void *nextfree)
     649                 :            : {
     650                 :          3 :         if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
     651                 :          0 :             !check_valid_pointer(s, page, nextfree)) {
     652                 :            :                 object_err(s, page, freelist, "Freechain corrupt");
     653                 :            :                 freelist = NULL;
     654                 :          0 :                 slab_fix(s, "Isolate corrupted freechain");
     655                 :          0 :                 return true;
     656                 :            :         }
     657                 :            : 
     658                 :            :         return false;
     659                 :            : }
     660                 :            : 
     661                 :          0 : static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
     662                 :            : {
     663                 :            :         unsigned int off;       /* Offset of last byte */
     664                 :            :         u8 *addr = page_address(page);
     665                 :            : 
     666                 :          0 :         print_tracking(s, p);
     667                 :            : 
     668                 :          0 :         print_page_info(page);
     669                 :            : 
     670                 :          0 :         pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
     671                 :            :                p, p - addr, get_freepointer(s, p));
     672                 :            : 
     673                 :          0 :         if (s->flags & SLAB_RED_ZONE)
     674                 :          0 :                 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
     675                 :            :                               s->red_left_pad);
     676                 :          0 :         else if (p > addr + 16)
     677                 :          0 :                 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
     678                 :            : 
     679                 :            :         print_section(KERN_ERR, "Object ", p,
     680                 :          0 :                       min_t(unsigned int, s->object_size, PAGE_SIZE));
     681                 :          0 :         if (s->flags & SLAB_RED_ZONE)
     682                 :          0 :                 print_section(KERN_ERR, "Redzone ", p + s->object_size,
     683                 :          0 :                         s->inuse - s->object_size);
     684                 :            : 
     685                 :          0 :         if (s->offset)
     686                 :          0 :                 off = s->offset + sizeof(void *);
     687                 :            :         else
     688                 :          0 :                 off = s->inuse;
     689                 :            : 
     690                 :          0 :         if (s->flags & SLAB_STORE_USER)
     691                 :          0 :                 off += 2 * sizeof(struct track);
     692                 :            : 
     693                 :            :         off += kasan_metadata_size(s);
     694                 :            : 
     695                 :          0 :         if (off != size_from_object(s))
     696                 :            :                 /* Beginning of the filler is the free pointer */
     697                 :          0 :                 print_section(KERN_ERR, "Padding ", p + off,
     698                 :            :                               size_from_object(s) - off);
     699                 :            : 
     700                 :          0 :         dump_stack();
     701                 :          0 : }
     702                 :            : 
     703                 :          0 : void object_err(struct kmem_cache *s, struct page *page,
     704                 :            :                         u8 *object, char *reason)
     705                 :            : {
     706                 :          0 :         slab_bug(s, "%s", reason);
     707                 :          0 :         print_trailer(s, page, object);
     708                 :          0 : }
     709                 :            : 
     710                 :          0 : static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
     711                 :            :                         const char *fmt, ...)
     712                 :            : {
     713                 :            :         va_list args;
     714                 :            :         char buf[100];
     715                 :            : 
     716                 :          0 :         va_start(args, fmt);
     717                 :          0 :         vsnprintf(buf, sizeof(buf), fmt, args);
     718                 :          0 :         va_end(args);
     719                 :          0 :         slab_bug(s, "%s", buf);
     720                 :          0 :         print_page_info(page);
     721                 :          0 :         dump_stack();
     722                 :          0 : }
     723                 :            : 
     724                 :          3 : static void init_object(struct kmem_cache *s, void *object, u8 val)
     725                 :            : {
     726                 :            :         u8 *p = object;
     727                 :            : 
     728                 :          3 :         if (s->flags & SLAB_RED_ZONE)
     729                 :          0 :                 memset(p - s->red_left_pad, val, s->red_left_pad);
     730                 :            : 
     731                 :          3 :         if (s->flags & __OBJECT_POISON) {
     732                 :          0 :                 memset(p, POISON_FREE, s->object_size - 1);
     733                 :          0 :                 p[s->object_size - 1] = POISON_END;
     734                 :            :         }
     735                 :            : 
     736                 :          3 :         if (s->flags & SLAB_RED_ZONE)
     737                 :          0 :                 memset(p + s->object_size, val, s->inuse - s->object_size);
     738                 :          3 : }
     739                 :            : 
     740                 :          0 : static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
     741                 :            :                                                 void *from, void *to)
     742                 :            : {
     743                 :          0 :         slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
     744                 :          0 :         memset(from, data, to - from);
     745                 :          0 : }
     746                 :            : 
     747                 :          0 : static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
     748                 :            :                         u8 *object, char *what,
     749                 :            :                         u8 *start, unsigned int value, unsigned int bytes)
     750                 :            : {
     751                 :            :         u8 *fault;
     752                 :            :         u8 *end;
     753                 :            : 
     754                 :            :         metadata_access_enable();
     755                 :          0 :         fault = memchr_inv(start, value, bytes);
     756                 :            :         metadata_access_disable();
     757                 :          0 :         if (!fault)
     758                 :            :                 return 1;
     759                 :            : 
     760                 :          0 :         end = start + bytes;
     761                 :          0 :         while (end > fault && end[-1] == value)
     762                 :          0 :                 end--;
     763                 :            : 
     764                 :          0 :         slab_bug(s, "%s overwritten", what);
     765                 :          0 :         pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
     766                 :            :                                         fault, end - 1, fault[0], value);
     767                 :          0 :         print_trailer(s, page, object);
     768                 :            : 
     769                 :          0 :         restore_bytes(s, what, value, fault, end);
     770                 :          0 :         return 0;
     771                 :            : }
     772                 :            : 
     773                 :            : /*
     774                 :            :  * Object layout:
     775                 :            :  *
     776                 :            :  * object address
     777                 :            :  *      Bytes of the object to be managed.
     778                 :            :  *      If the freepointer may overlay the object then the free
     779                 :            :  *      pointer is the first word of the object.
     780                 :            :  *
     781                 :            :  *      Poisoning uses 0x6b (POISON_FREE) and the last byte is
     782                 :            :  *      0xa5 (POISON_END)
     783                 :            :  *
     784                 :            :  * object + s->object_size
     785                 :            :  *      Padding to reach word boundary. This is also used for Redzoning.
     786                 :            :  *      Padding is extended by another word if Redzoning is enabled and
     787                 :            :  *      object_size == inuse.
     788                 :            :  *
     789                 :            :  *      We fill with 0xbb (RED_INACTIVE) for inactive objects and with
     790                 :            :  *      0xcc (RED_ACTIVE) for objects in use.
     791                 :            :  *
     792                 :            :  * object + s->inuse
     793                 :            :  *      Meta data starts here.
     794                 :            :  *
     795                 :            :  *      A. Free pointer (if we cannot overwrite object on free)
     796                 :            :  *      B. Tracking data for SLAB_STORE_USER
     797                 :            :  *      C. Padding to reach required alignment boundary or at mininum
     798                 :            :  *              one word if debugging is on to be able to detect writes
     799                 :            :  *              before the word boundary.
     800                 :            :  *
     801                 :            :  *      Padding is done using 0x5a (POISON_INUSE)
     802                 :            :  *
     803                 :            :  * object + s->size
     804                 :            :  *      Nothing is used beyond s->size.
     805                 :            :  *
     806                 :            :  * If slabcaches are merged then the object_size and inuse boundaries are mostly
     807                 :            :  * ignored. And therefore no slab options that rely on these boundaries
     808                 :            :  * may be used with merged slabcaches.
     809                 :            :  */
     810                 :            : 
     811                 :          0 : static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
     812                 :            : {
     813                 :          0 :         unsigned long off = s->inuse;        /* The end of info */
     814                 :            : 
     815                 :          0 :         if (s->offset)
     816                 :            :                 /* Freepointer is placed after the object. */
     817                 :          0 :                 off += sizeof(void *);
     818                 :            : 
     819                 :          0 :         if (s->flags & SLAB_STORE_USER)
     820                 :            :                 /* We also have user information there */
     821                 :          0 :                 off += 2 * sizeof(struct track);
     822                 :            : 
     823                 :            :         off += kasan_metadata_size(s);
     824                 :            : 
     825                 :          0 :         if (size_from_object(s) == off)
     826                 :            :                 return 1;
     827                 :            : 
     828                 :          0 :         return check_bytes_and_report(s, page, p, "Object padding",
     829                 :          0 :                         p + off, POISON_INUSE, size_from_object(s) - off);
     830                 :            : }
     831                 :            : 
     832                 :            : /* Check the pad bytes at the end of a slab page */
     833                 :          0 : static int slab_pad_check(struct kmem_cache *s, struct page *page)
     834                 :            : {
     835                 :            :         u8 *start;
     836                 :            :         u8 *fault;
     837                 :            :         u8 *end;
     838                 :            :         u8 *pad;
     839                 :            :         int length;
     840                 :            :         int remainder;
     841                 :            : 
     842                 :          0 :         if (!(s->flags & SLAB_POISON))
     843                 :            :                 return 1;
     844                 :            : 
     845                 :            :         start = page_address(page);
     846                 :            :         length = page_size(page);
     847                 :          0 :         end = start + length;
     848                 :          0 :         remainder = length % s->size;
     849                 :          0 :         if (!remainder)
     850                 :            :                 return 1;
     851                 :            : 
     852                 :          0 :         pad = end - remainder;
     853                 :            :         metadata_access_enable();
     854                 :          0 :         fault = memchr_inv(pad, POISON_INUSE, remainder);
     855                 :            :         metadata_access_disable();
     856                 :          0 :         if (!fault)
     857                 :            :                 return 1;
     858                 :          0 :         while (end > fault && end[-1] == POISON_INUSE)
     859                 :          0 :                 end--;
     860                 :            : 
     861                 :          0 :         slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
     862                 :            :         print_section(KERN_ERR, "Padding ", pad, remainder);
     863                 :            : 
     864                 :          0 :         restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
     865                 :          0 :         return 0;
     866                 :            : }
     867                 :            : 
     868                 :          0 : static int check_object(struct kmem_cache *s, struct page *page,
     869                 :            :                                         void *object, u8 val)
     870                 :            : {
     871                 :            :         u8 *p = object;
     872                 :          0 :         u8 *endobject = object + s->object_size;
     873                 :            : 
     874                 :          0 :         if (s->flags & SLAB_RED_ZONE) {
     875                 :          0 :                 if (!check_bytes_and_report(s, page, object, "Redzone",
     876                 :          0 :                         object - s->red_left_pad, val, s->red_left_pad))
     877                 :            :                         return 0;
     878                 :            : 
     879                 :          0 :                 if (!check_bytes_and_report(s, page, object, "Redzone",
     880                 :          0 :                         endobject, val, s->inuse - s->object_size))
     881                 :            :                         return 0;
     882                 :            :         } else {
     883                 :          0 :                 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
     884                 :          0 :                         check_bytes_and_report(s, page, p, "Alignment padding",
     885                 :            :                                 endobject, POISON_INUSE,
     886                 :            :                                 s->inuse - s->object_size);
     887                 :            :                 }
     888                 :            :         }
     889                 :            : 
     890                 :          0 :         if (s->flags & SLAB_POISON) {
     891                 :          0 :                 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
     892                 :          0 :                         (!check_bytes_and_report(s, page, p, "Poison", p,
     893                 :          0 :                                         POISON_FREE, s->object_size - 1) ||
     894                 :          0 :                          !check_bytes_and_report(s, page, p, "Poison",
     895                 :          0 :                                 p + s->object_size - 1, POISON_END, 1)))
     896                 :            :                         return 0;
     897                 :            :                 /*
     898                 :            :                  * check_pad_bytes cleans up on its own.
     899                 :            :                  */
     900                 :          0 :                 check_pad_bytes(s, page, p);
     901                 :            :         }
     902                 :            : 
     903                 :          0 :         if (!s->offset && val == SLUB_RED_ACTIVE)
     904                 :            :                 /*
     905                 :            :                  * Object and freepointer overlap. Cannot check
     906                 :            :                  * freepointer while object is allocated.
     907                 :            :                  */
     908                 :            :                 return 1;
     909                 :            : 
     910                 :            :         /* Check free pointer validity */
     911                 :          0 :         if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
     912                 :            :                 object_err(s, page, p, "Freepointer corrupt");
     913                 :            :                 /*
     914                 :            :                  * No choice but to zap it and thus lose the remainder
     915                 :            :                  * of the free objects in this slab. May cause
     916                 :            :                  * another error because the object count is now wrong.
     917                 :            :                  */
     918                 :            :                 set_freepointer(s, p, NULL);
     919                 :          0 :                 return 0;
     920                 :            :         }
     921                 :            :         return 1;
     922                 :            : }
     923                 :            : 
     924                 :          0 : static int check_slab(struct kmem_cache *s, struct page *page)
     925                 :            : {
     926                 :            :         int maxobj;
     927                 :            : 
     928                 :            :         VM_BUG_ON(!irqs_disabled());
     929                 :            : 
     930                 :          0 :         if (!PageSlab(page)) {
     931                 :          0 :                 slab_err(s, page, "Not a valid slab page");
     932                 :          0 :                 return 0;
     933                 :            :         }
     934                 :            : 
     935                 :          0 :         maxobj = order_objects(compound_order(page), s->size);
     936                 :          0 :         if (page->objects > maxobj) {
     937                 :          0 :                 slab_err(s, page, "objects %u > max %u",
     938                 :            :                         page->objects, maxobj);
     939                 :          0 :                 return 0;
     940                 :            :         }
     941                 :          0 :         if (page->inuse > page->objects) {
     942                 :          0 :                 slab_err(s, page, "inuse %u > max %u",
     943                 :            :                         page->inuse, page->objects);
     944                 :          0 :                 return 0;
     945                 :            :         }
     946                 :            :         /* Slab_pad_check fixes things up after itself */
     947                 :          0 :         slab_pad_check(s, page);
     948                 :          0 :         return 1;
     949                 :            : }
     950                 :            : 
     951                 :            : /*
     952                 :            :  * Determine if a certain object on a page is on the freelist. Must hold the
     953                 :            :  * slab lock to guarantee that the chains are in a consistent state.
     954                 :            :  */
     955                 :          0 : static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
     956                 :            : {
     957                 :            :         int nr = 0;
     958                 :            :         void *fp;
     959                 :            :         void *object = NULL;
     960                 :            :         int max_objects;
     961                 :            : 
     962                 :          0 :         fp = page->freelist;
     963                 :          0 :         while (fp && nr <= page->objects) {
     964                 :          0 :                 if (fp == search)
     965                 :            :                         return 1;
     966                 :          0 :                 if (!check_valid_pointer(s, page, fp)) {
     967                 :          0 :                         if (object) {
     968                 :            :                                 object_err(s, page, object,
     969                 :            :                                         "Freechain corrupt");
     970                 :            :                                 set_freepointer(s, object, NULL);
     971                 :            :                         } else {
     972                 :          0 :                                 slab_err(s, page, "Freepointer corrupt");
     973                 :          0 :                                 page->freelist = NULL;
     974                 :          0 :                                 page->inuse = page->objects;
     975                 :          0 :                                 slab_fix(s, "Freelist cleared");
     976                 :          0 :                                 return 0;
     977                 :            :                         }
     978                 :            :                         break;
     979                 :            :                 }
     980                 :            :                 object = fp;
     981                 :            :                 fp = get_freepointer(s, object);
     982                 :          0 :                 nr++;
     983                 :            :         }
     984                 :            : 
     985                 :          0 :         max_objects = order_objects(compound_order(page), s->size);
     986                 :          0 :         if (max_objects > MAX_OBJS_PER_PAGE)
     987                 :            :                 max_objects = MAX_OBJS_PER_PAGE;
     988                 :            : 
     989                 :          0 :         if (page->objects != max_objects) {
     990                 :          0 :                 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
     991                 :            :                          page->objects, max_objects);
     992                 :          0 :                 page->objects = max_objects;
     993                 :          0 :                 slab_fix(s, "Number of objects adjusted.");
     994                 :            :         }
     995                 :          0 :         if (page->inuse != page->objects - nr) {
     996                 :          0 :                 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
     997                 :            :                          page->inuse, page->objects - nr);
     998                 :          0 :                 page->inuse = page->objects - nr;
     999                 :          0 :                 slab_fix(s, "Object count adjusted.");
    1000                 :            :         }
    1001                 :          0 :         return search == NULL;
    1002                 :            : }
    1003                 :            : 
    1004                 :          0 : static void trace(struct kmem_cache *s, struct page *page, void *object,
    1005                 :            :                                                                 int alloc)
    1006                 :            : {
    1007                 :          0 :         if (s->flags & SLAB_TRACE) {
    1008                 :          0 :                 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
    1009                 :            :                         s->name,
    1010                 :            :                         alloc ? "alloc" : "free",
    1011                 :            :                         object, page->inuse,
    1012                 :            :                         page->freelist);
    1013                 :            : 
    1014                 :          0 :                 if (!alloc)
    1015                 :          0 :                         print_section(KERN_INFO, "Object ", (void *)object,
    1016                 :            :                                         s->object_size);
    1017                 :            : 
    1018                 :          0 :                 dump_stack();
    1019                 :            :         }
    1020                 :          0 : }
    1021                 :            : 
    1022                 :            : /*
    1023                 :            :  * Tracking of fully allocated slabs for debugging purposes.
    1024                 :            :  */
    1025                 :            : static void add_full(struct kmem_cache *s,
    1026                 :            :         struct kmem_cache_node *n, struct page *page)
    1027                 :            : {
    1028                 :          3 :         if (!(s->flags & SLAB_STORE_USER))
    1029                 :            :                 return;
    1030                 :            : 
    1031                 :            :         lockdep_assert_held(&n->list_lock);
    1032                 :          0 :         list_add(&page->slab_list, &n->full);
    1033                 :            : }
    1034                 :            : 
    1035                 :            : static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
    1036                 :            : {
    1037                 :          0 :         if (!(s->flags & SLAB_STORE_USER))
    1038                 :            :                 return;
    1039                 :            : 
    1040                 :            :         lockdep_assert_held(&n->list_lock);
    1041                 :            :         list_del(&page->slab_list);
    1042                 :            : }
    1043                 :            : 
    1044                 :            : /* Tracking of the number of slabs for debugging purposes */
    1045                 :            : static inline unsigned long slabs_node(struct kmem_cache *s, int node)
    1046                 :            : {
    1047                 :            :         struct kmem_cache_node *n = get_node(s, node);
    1048                 :            : 
    1049                 :            :         return atomic_long_read(&n->nr_slabs);
    1050                 :            : }
    1051                 :            : 
    1052                 :            : static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
    1053                 :            : {
    1054                 :            :         return atomic_long_read(&n->nr_slabs);
    1055                 :            : }
    1056                 :            : 
    1057                 :          3 : static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
    1058                 :            : {
    1059                 :            :         struct kmem_cache_node *n = get_node(s, node);
    1060                 :            : 
    1061                 :            :         /*
    1062                 :            :          * May be called early in order to allocate a slab for the
    1063                 :            :          * kmem_cache_node structure. Solve the chicken-egg
    1064                 :            :          * dilemma by deferring the increment of the count during
    1065                 :            :          * bootstrap (see early_kmem_cache_node_alloc).
    1066                 :            :          */
    1067                 :          3 :         if (likely(n)) {
    1068                 :          3 :                 atomic_long_inc(&n->nr_slabs);
    1069                 :          3 :                 atomic_long_add(objects, &n->total_objects);
    1070                 :            :         }
    1071                 :          3 : }
    1072                 :          3 : static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
    1073                 :            : {
    1074                 :            :         struct kmem_cache_node *n = get_node(s, node);
    1075                 :            : 
    1076                 :          3 :         atomic_long_dec(&n->nr_slabs);
    1077                 :          3 :         atomic_long_sub(objects, &n->total_objects);
    1078                 :          3 : }
    1079                 :            : 
    1080                 :            : /* Object debug checks for alloc/free paths */
    1081                 :          3 : static void setup_object_debug(struct kmem_cache *s, struct page *page,
    1082                 :            :                                                                 void *object)
    1083                 :            : {
    1084                 :          3 :         if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
    1085                 :          3 :                 return;
    1086                 :            : 
    1087                 :          0 :         init_object(s, object, SLUB_RED_INACTIVE);
    1088                 :          0 :         init_tracking(s, object);
    1089                 :            : }
    1090                 :            : 
    1091                 :            : static
    1092                 :          3 : void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
    1093                 :            : {
    1094                 :          3 :         if (!(s->flags & SLAB_POISON))
    1095                 :          3 :                 return;
    1096                 :            : 
    1097                 :            :         metadata_access_enable();
    1098                 :          0 :         memset(addr, POISON_INUSE, page_size(page));
    1099                 :            :         metadata_access_disable();
    1100                 :            : }
    1101                 :            : 
    1102                 :          0 : static inline int alloc_consistency_checks(struct kmem_cache *s,
    1103                 :            :                                         struct page *page, void *object)
    1104                 :            : {
    1105                 :          0 :         if (!check_slab(s, page))
    1106                 :            :                 return 0;
    1107                 :            : 
    1108                 :          0 :         if (!check_valid_pointer(s, page, object)) {
    1109                 :            :                 object_err(s, page, object, "Freelist Pointer check fails");
    1110                 :          0 :                 return 0;
    1111                 :            :         }
    1112                 :            : 
    1113                 :          0 :         if (!check_object(s, page, object, SLUB_RED_INACTIVE))
    1114                 :            :                 return 0;
    1115                 :            : 
    1116                 :          0 :         return 1;
    1117                 :            : }
    1118                 :            : 
    1119                 :          0 : static noinline int alloc_debug_processing(struct kmem_cache *s,
    1120                 :            :                                         struct page *page,
    1121                 :            :                                         void *object, unsigned long addr)
    1122                 :            : {
    1123                 :          0 :         if (s->flags & SLAB_CONSISTENCY_CHECKS) {
    1124                 :          0 :                 if (!alloc_consistency_checks(s, page, object))
    1125                 :            :                         goto bad;
    1126                 :            :         }
    1127                 :            : 
    1128                 :            :         /* Success perform special debug activities for allocs */
    1129                 :          0 :         if (s->flags & SLAB_STORE_USER)
    1130                 :          0 :                 set_track(s, object, TRACK_ALLOC, addr);
    1131                 :          0 :         trace(s, page, object, 1);
    1132                 :          0 :         init_object(s, object, SLUB_RED_ACTIVE);
    1133                 :          0 :         return 1;
    1134                 :            : 
    1135                 :            : bad:
    1136                 :          0 :         if (PageSlab(page)) {
    1137                 :            :                 /*
    1138                 :            :                  * If this is a slab page then lets do the best we can
    1139                 :            :                  * to avoid issues in the future. Marking all objects
    1140                 :            :                  * as used avoids touching the remaining objects.
    1141                 :            :                  */
    1142                 :          0 :                 slab_fix(s, "Marking all objects used");
    1143                 :          0 :                 page->inuse = page->objects;
    1144                 :          0 :                 page->freelist = NULL;
    1145                 :            :         }
    1146                 :            :         return 0;
    1147                 :            : }
    1148                 :            : 
    1149                 :          0 : static inline int free_consistency_checks(struct kmem_cache *s,
    1150                 :            :                 struct page *page, void *object, unsigned long addr)
    1151                 :            : {
    1152                 :          0 :         if (!check_valid_pointer(s, page, object)) {
    1153                 :          0 :                 slab_err(s, page, "Invalid object pointer 0x%p", object);
    1154                 :          0 :                 return 0;
    1155                 :            :         }
    1156                 :            : 
    1157                 :          0 :         if (on_freelist(s, page, object)) {
    1158                 :            :                 object_err(s, page, object, "Object already free");
    1159                 :          0 :                 return 0;
    1160                 :            :         }
    1161                 :            : 
    1162                 :          0 :         if (!check_object(s, page, object, SLUB_RED_ACTIVE))
    1163                 :            :                 return 0;
    1164                 :            : 
    1165                 :          0 :         if (unlikely(s != page->slab_cache)) {
    1166                 :          0 :                 if (!PageSlab(page)) {
    1167                 :          0 :                         slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
    1168                 :            :                                  object);
    1169                 :          0 :                 } else if (!page->slab_cache) {
    1170                 :          0 :                         pr_err("SLUB <none>: no slab for object 0x%p.\n",
    1171                 :            :                                object);
    1172                 :          0 :                         dump_stack();
    1173                 :            :                 } else
    1174                 :            :                         object_err(s, page, object,
    1175                 :            :                                         "page slab pointer corrupt.");
    1176                 :            :                 return 0;
    1177                 :            :         }
    1178                 :            :         return 1;
    1179                 :            : }
    1180                 :            : 
    1181                 :            : /* Supports checking bulk free of a constructed freelist */
    1182                 :          0 : static noinline int free_debug_processing(
    1183                 :            :         struct kmem_cache *s, struct page *page,
    1184                 :            :         void *head, void *tail, int bulk_cnt,
    1185                 :            :         unsigned long addr)
    1186                 :            : {
    1187                 :            :         struct kmem_cache_node *n = get_node(s, page_to_nid(page));
    1188                 :            :         void *object = head;
    1189                 :            :         int cnt = 0;
    1190                 :            :         unsigned long uninitialized_var(flags);
    1191                 :            :         int ret = 0;
    1192                 :            : 
    1193                 :          0 :         spin_lock_irqsave(&n->list_lock, flags);
    1194                 :            :         slab_lock(page);
    1195                 :            : 
    1196                 :          0 :         if (s->flags & SLAB_CONSISTENCY_CHECKS) {
    1197                 :          0 :                 if (!check_slab(s, page))
    1198                 :            :                         goto out;
    1199                 :            :         }
    1200                 :            : 
    1201                 :            : next_object:
    1202                 :          0 :         cnt++;
    1203                 :            : 
    1204                 :          0 :         if (s->flags & SLAB_CONSISTENCY_CHECKS) {
    1205                 :          0 :                 if (!free_consistency_checks(s, page, object, addr))
    1206                 :            :                         goto out;
    1207                 :            :         }
    1208                 :            : 
    1209                 :          0 :         if (s->flags & SLAB_STORE_USER)
    1210                 :          0 :                 set_track(s, object, TRACK_FREE, addr);
    1211                 :          0 :         trace(s, page, object, 0);
    1212                 :            :         /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
    1213                 :          0 :         init_object(s, object, SLUB_RED_INACTIVE);
    1214                 :            : 
    1215                 :            :         /* Reached end of constructed freelist yet? */
    1216                 :          0 :         if (object != tail) {
    1217                 :            :                 object = get_freepointer(s, object);
    1218                 :            :                 goto next_object;
    1219                 :            :         }
    1220                 :            :         ret = 1;
    1221                 :            : 
    1222                 :            : out:
    1223                 :          0 :         if (cnt != bulk_cnt)
    1224                 :          0 :                 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
    1225                 :            :                          bulk_cnt, cnt);
    1226                 :            : 
    1227                 :            :         slab_unlock(page);
    1228                 :            :         spin_unlock_irqrestore(&n->list_lock, flags);
    1229                 :          0 :         if (!ret)
    1230                 :          0 :                 slab_fix(s, "Object at 0x%p not freed", object);
    1231                 :          0 :         return ret;
    1232                 :            : }
    1233                 :            : 
    1234                 :          0 : static int __init setup_slub_debug(char *str)
    1235                 :            : {
    1236                 :          0 :         slub_debug = DEBUG_DEFAULT_FLAGS;
    1237                 :          0 :         if (*str++ != '=' || !*str)
    1238                 :            :                 /*
    1239                 :            :                  * No options specified. Switch on full debugging.
    1240                 :            :                  */
    1241                 :            :                 goto out;
    1242                 :            : 
    1243                 :          0 :         if (*str == ',')
    1244                 :            :                 /*
    1245                 :            :                  * No options but restriction on slabs. This means full
    1246                 :            :                  * debugging for slabs matching a pattern.
    1247                 :            :                  */
    1248                 :            :                 goto check_slabs;
    1249                 :            : 
    1250                 :          0 :         slub_debug = 0;
    1251                 :          0 :         if (*str == '-')
    1252                 :            :                 /*
    1253                 :            :                  * Switch off all debugging measures.
    1254                 :            :                  */
    1255                 :            :                 goto out;
    1256                 :            : 
    1257                 :            :         /*
    1258                 :            :          * Determine which debug features should be switched on
    1259                 :            :          */
    1260                 :          0 :         for (; *str && *str != ','; str++) {
    1261                 :          0 :                 switch (tolower(*str)) {
    1262                 :            :                 case 'f':
    1263                 :          0 :                         slub_debug |= SLAB_CONSISTENCY_CHECKS;
    1264                 :          0 :                         break;
    1265                 :            :                 case 'z':
    1266                 :          0 :                         slub_debug |= SLAB_RED_ZONE;
    1267                 :          0 :                         break;
    1268                 :            :                 case 'p':
    1269                 :          0 :                         slub_debug |= SLAB_POISON;
    1270                 :          0 :                         break;
    1271                 :            :                 case 'u':
    1272                 :          0 :                         slub_debug |= SLAB_STORE_USER;
    1273                 :          0 :                         break;
    1274                 :            :                 case 't':
    1275                 :          0 :                         slub_debug |= SLAB_TRACE;
    1276                 :          0 :                         break;
    1277                 :            :                 case 'a':
    1278                 :            :                         slub_debug |= SLAB_FAILSLAB;
    1279                 :            :                         break;
    1280                 :            :                 case 'o':
    1281                 :            :                         /*
    1282                 :            :                          * Avoid enabling debugging on caches if its minimum
    1283                 :            :                          * order would increase as a result.
    1284                 :            :                          */
    1285                 :          0 :                         disable_higher_order_debug = 1;
    1286                 :          0 :                         break;
    1287                 :            :                 default:
    1288                 :          0 :                         pr_err("slub_debug option '%c' unknown. skipped\n",
    1289                 :            :                                *str);
    1290                 :            :                 }
    1291                 :            :         }
    1292                 :            : 
    1293                 :            : check_slabs:
    1294                 :          0 :         if (*str == ',')
    1295                 :          0 :                 slub_debug_slabs = str + 1;
    1296                 :            : out:
    1297                 :          0 :         if ((static_branch_unlikely(&init_on_alloc) ||
    1298                 :          0 :              static_branch_unlikely(&init_on_free)) &&
    1299                 :          0 :             (slub_debug & SLAB_POISON))
    1300                 :          0 :                 pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
    1301                 :          0 :         return 1;
    1302                 :            : }
    1303                 :            : 
    1304                 :            : __setup("slub_debug", setup_slub_debug);
    1305                 :            : 
    1306                 :            : /*
    1307                 :            :  * kmem_cache_flags - apply debugging options to the cache
    1308                 :            :  * @object_size:        the size of an object without meta data
    1309                 :            :  * @flags:              flags to set
    1310                 :            :  * @name:               name of the cache
    1311                 :            :  * @ctor:               constructor function
    1312                 :            :  *
    1313                 :            :  * Debug option(s) are applied to @flags. In addition to the debug
    1314                 :            :  * option(s), if a slab name (or multiple) is specified i.e.
    1315                 :            :  * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
    1316                 :            :  * then only the select slabs will receive the debug option(s).
    1317                 :            :  */
    1318                 :          3 : slab_flags_t kmem_cache_flags(unsigned int object_size,
    1319                 :            :         slab_flags_t flags, const char *name,
    1320                 :            :         void (*ctor)(void *))
    1321                 :            : {
    1322                 :            :         char *iter;
    1323                 :            :         size_t len;
    1324                 :            : 
    1325                 :            :         /* If slub_debug = 0, it folds into the if conditional. */
    1326                 :          3 :         if (!slub_debug_slabs)
    1327                 :          3 :                 return flags | slub_debug;
    1328                 :            : 
    1329                 :          0 :         len = strlen(name);
    1330                 :            :         iter = slub_debug_slabs;
    1331                 :          0 :         while (*iter) {
    1332                 :            :                 char *end, *glob;
    1333                 :            :                 size_t cmplen;
    1334                 :            : 
    1335                 :          0 :                 end = strchrnul(iter, ',');
    1336                 :            : 
    1337                 :          0 :                 glob = strnchr(iter, end - iter, '*');
    1338                 :          0 :                 if (glob)
    1339                 :          0 :                         cmplen = glob - iter;
    1340                 :            :                 else
    1341                 :          0 :                         cmplen = max_t(size_t, len, (end - iter));
    1342                 :            : 
    1343                 :          0 :                 if (!strncmp(name, iter, cmplen)) {
    1344                 :          0 :                         flags |= slub_debug;
    1345                 :          0 :                         break;
    1346                 :            :                 }
    1347                 :            : 
    1348                 :          0 :                 if (!*end)
    1349                 :            :                         break;
    1350                 :          0 :                 iter = end + 1;
    1351                 :            :         }
    1352                 :            : 
    1353                 :          0 :         return flags;
    1354                 :            : }
    1355                 :            : #else /* !CONFIG_SLUB_DEBUG */
    1356                 :            : static inline void setup_object_debug(struct kmem_cache *s,
    1357                 :            :                         struct page *page, void *object) {}
    1358                 :            : static inline
    1359                 :            : void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
    1360                 :            : 
    1361                 :            : static inline int alloc_debug_processing(struct kmem_cache *s,
    1362                 :            :         struct page *page, void *object, unsigned long addr) { return 0; }
    1363                 :            : 
    1364                 :            : static inline int free_debug_processing(
    1365                 :            :         struct kmem_cache *s, struct page *page,
    1366                 :            :         void *head, void *tail, int bulk_cnt,
    1367                 :            :         unsigned long addr) { return 0; }
    1368                 :            : 
    1369                 :            : static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
    1370                 :            :                         { return 1; }
    1371                 :            : static inline int check_object(struct kmem_cache *s, struct page *page,
    1372                 :            :                         void *object, u8 val) { return 1; }
    1373                 :            : static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
    1374                 :            :                                         struct page *page) {}
    1375                 :            : static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
    1376                 :            :                                         struct page *page) {}
    1377                 :            : slab_flags_t kmem_cache_flags(unsigned int object_size,
    1378                 :            :         slab_flags_t flags, const char *name,
    1379                 :            :         void (*ctor)(void *))
    1380                 :            : {
    1381                 :            :         return flags;
    1382                 :            : }
    1383                 :            : #define slub_debug 0
    1384                 :            : 
    1385                 :            : #define disable_higher_order_debug 0
    1386                 :            : 
    1387                 :            : static inline unsigned long slabs_node(struct kmem_cache *s, int node)
    1388                 :            :                                                         { return 0; }
    1389                 :            : static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
    1390                 :            :                                                         { return 0; }
    1391                 :            : static inline void inc_slabs_node(struct kmem_cache *s, int node,
    1392                 :            :                                                         int objects) {}
    1393                 :            : static inline void dec_slabs_node(struct kmem_cache *s, int node,
    1394                 :            :                                                         int objects) {}
    1395                 :            : 
    1396                 :            : static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
    1397                 :            :                                void *freelist, void *nextfree)
    1398                 :            : {
    1399                 :            :         return false;
    1400                 :            : }
    1401                 :            : #endif /* CONFIG_SLUB_DEBUG */
    1402                 :            : 
    1403                 :            : /*
    1404                 :            :  * Hooks for other subsystems that check memory allocations. In a typical
    1405                 :            :  * production configuration these hooks all should produce no code at all.
    1406                 :            :  */
    1407                 :            : static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
    1408                 :            : {
    1409                 :            :         ptr = kasan_kmalloc_large(ptr, size, flags);
    1410                 :            :         /* As ptr might get tagged, call kmemleak hook after KASAN. */
    1411                 :            :         kmemleak_alloc(ptr, size, 1, flags);
    1412                 :            :         return ptr;
    1413                 :            : }
    1414                 :            : 
    1415                 :            : static __always_inline void kfree_hook(void *x)
    1416                 :            : {
    1417                 :            :         kmemleak_free(x);
    1418                 :          3 :         kasan_kfree_large(x, _RET_IP_);
    1419                 :            : }
    1420                 :            : 
    1421                 :            : static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
    1422                 :            : {
    1423                 :            :         kmemleak_free_recursive(x, s->flags);
    1424                 :            : 
    1425                 :            :         /*
    1426                 :            :          * Trouble is that we may no longer disable interrupts in the fast path
    1427                 :            :          * So in order to make the debug calls that expect irqs to be
    1428                 :            :          * disabled we need to disable interrupts temporarily.
    1429                 :            :          */
    1430                 :            : #ifdef CONFIG_LOCKDEP
    1431                 :            :         {
    1432                 :            :                 unsigned long flags;
    1433                 :            : 
    1434                 :            :                 local_irq_save(flags);
    1435                 :            :                 debug_check_no_locks_freed(x, s->object_size);
    1436                 :            :                 local_irq_restore(flags);
    1437                 :            :         }
    1438                 :            : #endif
    1439                 :            :         if (!(s->flags & SLAB_DEBUG_OBJECTS))
    1440                 :            :                 debug_check_no_obj_freed(x, s->object_size);
    1441                 :            : 
    1442                 :            :         /* KASAN might put x into memory quarantine, delaying its reuse */
    1443                 :          3 :         return kasan_slab_free(s, x, _RET_IP_);
    1444                 :            : }
    1445                 :            : 
    1446                 :          3 : static inline bool slab_free_freelist_hook(struct kmem_cache *s,
    1447                 :            :                                            void **head, void **tail)
    1448                 :            : {
    1449                 :            : 
    1450                 :            :         void *object;
    1451                 :          3 :         void *next = *head;
    1452                 :          3 :         void *old_tail = *tail ? *tail : *head;
    1453                 :            :         int rsize;
    1454                 :            : 
    1455                 :            :         /* Head and tail of the reconstructed freelist */
    1456                 :          3 :         *head = NULL;
    1457                 :          3 :         *tail = NULL;
    1458                 :            : 
    1459                 :            :         do {
    1460                 :            :                 object = next;
    1461                 :            :                 next = get_freepointer(s, object);
    1462                 :            : 
    1463                 :          3 :                 if (slab_want_init_on_free(s)) {
    1464                 :            :                         /*
    1465                 :            :                          * Clear the object and the metadata, but don't touch
    1466                 :            :                          * the redzone.
    1467                 :            :                          */
    1468                 :          0 :                         memset(object, 0, s->object_size);
    1469                 :          0 :                         rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
    1470                 :            :                                                            : 0;
    1471                 :          0 :                         memset((char *)object + s->inuse, 0,
    1472                 :          0 :                                s->size - s->inuse - rsize);
    1473                 :            : 
    1474                 :            :                 }
    1475                 :            :                 /* If object's reuse doesn't have to be delayed */
    1476                 :          3 :                 if (!slab_free_hook(s, object)) {
    1477                 :            :                         /* Move object to the new freelist */
    1478                 :          3 :                         set_freepointer(s, object, *head);
    1479                 :          3 :                         *head = object;
    1480                 :          3 :                         if (!*tail)
    1481                 :          3 :                                 *tail = object;
    1482                 :            :                 }
    1483                 :          3 :         } while (object != old_tail);
    1484                 :            : 
    1485                 :          3 :         if (*head == *tail)
    1486                 :          3 :                 *tail = NULL;
    1487                 :            : 
    1488                 :          3 :         return *head != NULL;
    1489                 :            : }
    1490                 :            : 
    1491                 :          3 : static void *setup_object(struct kmem_cache *s, struct page *page,
    1492                 :            :                                 void *object)
    1493                 :            : {
    1494                 :          3 :         setup_object_debug(s, page, object);
    1495                 :            :         object = kasan_init_slab_obj(s, object);
    1496                 :          3 :         if (unlikely(s->ctor)) {
    1497                 :            :                 kasan_unpoison_object_data(s, object);
    1498                 :          3 :                 s->ctor(object);
    1499                 :            :                 kasan_poison_object_data(s, object);
    1500                 :            :         }
    1501                 :          3 :         return object;
    1502                 :            : }
    1503                 :            : 
    1504                 :            : /*
    1505                 :            :  * Slab allocation and freeing
    1506                 :            :  */
    1507                 :          3 : static inline struct page *alloc_slab_page(struct kmem_cache *s,
    1508                 :            :                 gfp_t flags, int node, struct kmem_cache_order_objects oo)
    1509                 :            : {
    1510                 :            :         struct page *page;
    1511                 :            :         unsigned int order = oo_order(oo);
    1512                 :            : 
    1513                 :          3 :         if (node == NUMA_NO_NODE)
    1514                 :            :                 page = alloc_pages(flags, order);
    1515                 :            :         else
    1516                 :            :                 page = __alloc_pages_node(node, flags, order);
    1517                 :            : 
    1518                 :          3 :         if (page && charge_slab_page(page, flags, order, s)) {
    1519                 :          0 :                 __free_pages(page, order);
    1520                 :            :                 page = NULL;
    1521                 :            :         }
    1522                 :            : 
    1523                 :          3 :         return page;
    1524                 :            : }
    1525                 :            : 
    1526                 :            : #ifdef CONFIG_SLAB_FREELIST_RANDOM
    1527                 :            : /* Pre-initialize the random sequence cache */
    1528                 :            : static int init_cache_random_seq(struct kmem_cache *s)
    1529                 :            : {
    1530                 :            :         unsigned int count = oo_objects(s->oo);
    1531                 :            :         int err;
    1532                 :            : 
    1533                 :            :         /* Bailout if already initialised */
    1534                 :            :         if (s->random_seq)
    1535                 :            :                 return 0;
    1536                 :            : 
    1537                 :            :         err = cache_random_seq_create(s, count, GFP_KERNEL);
    1538                 :            :         if (err) {
    1539                 :            :                 pr_err("SLUB: Unable to initialize free list for %s\n",
    1540                 :            :                         s->name);
    1541                 :            :                 return err;
    1542                 :            :         }
    1543                 :            : 
    1544                 :            :         /* Transform to an offset on the set of pages */
    1545                 :            :         if (s->random_seq) {
    1546                 :            :                 unsigned int i;
    1547                 :            : 
    1548                 :            :                 for (i = 0; i < count; i++)
    1549                 :            :                         s->random_seq[i] *= s->size;
    1550                 :            :         }
    1551                 :            :         return 0;
    1552                 :            : }
    1553                 :            : 
    1554                 :            : /* Initialize each random sequence freelist per cache */
    1555                 :            : static void __init init_freelist_randomization(void)
    1556                 :            : {
    1557                 :            :         struct kmem_cache *s;
    1558                 :            : 
    1559                 :            :         mutex_lock(&slab_mutex);
    1560                 :            : 
    1561                 :            :         list_for_each_entry(s, &slab_caches, list)
    1562                 :            :                 init_cache_random_seq(s);
    1563                 :            : 
    1564                 :            :         mutex_unlock(&slab_mutex);
    1565                 :            : }
    1566                 :            : 
    1567                 :            : /* Get the next entry on the pre-computed freelist randomized */
    1568                 :            : static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
    1569                 :            :                                 unsigned long *pos, void *start,
    1570                 :            :                                 unsigned long page_limit,
    1571                 :            :                                 unsigned long freelist_count)
    1572                 :            : {
    1573                 :            :         unsigned int idx;
    1574                 :            : 
    1575                 :            :         /*
    1576                 :            :          * If the target page allocation failed, the number of objects on the
    1577                 :            :          * page might be smaller than the usual size defined by the cache.
    1578                 :            :          */
    1579                 :            :         do {
    1580                 :            :                 idx = s->random_seq[*pos];
    1581                 :            :                 *pos += 1;
    1582                 :            :                 if (*pos >= freelist_count)
    1583                 :            :                         *pos = 0;
    1584                 :            :         } while (unlikely(idx >= page_limit));
    1585                 :            : 
    1586                 :            :         return (char *)start + idx;
    1587                 :            : }
    1588                 :            : 
    1589                 :            : /* Shuffle the single linked freelist based on a random pre-computed sequence */
    1590                 :            : static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
    1591                 :            : {
    1592                 :            :         void *start;
    1593                 :            :         void *cur;
    1594                 :            :         void *next;
    1595                 :            :         unsigned long idx, pos, page_limit, freelist_count;
    1596                 :            : 
    1597                 :            :         if (page->objects < 2 || !s->random_seq)
    1598                 :            :                 return false;
    1599                 :            : 
    1600                 :            :         freelist_count = oo_objects(s->oo);
    1601                 :            :         pos = get_random_int() % freelist_count;
    1602                 :            : 
    1603                 :            :         page_limit = page->objects * s->size;
    1604                 :            :         start = fixup_red_left(s, page_address(page));
    1605                 :            : 
    1606                 :            :         /* First entry is used as the base of the freelist */
    1607                 :            :         cur = next_freelist_entry(s, page, &pos, start, page_limit,
    1608                 :            :                                 freelist_count);
    1609                 :            :         cur = setup_object(s, page, cur);
    1610                 :            :         page->freelist = cur;
    1611                 :            : 
    1612                 :            :         for (idx = 1; idx < page->objects; idx++) {
    1613                 :            :                 next = next_freelist_entry(s, page, &pos, start, page_limit,
    1614                 :            :                         freelist_count);
    1615                 :            :                 next = setup_object(s, page, next);
    1616                 :            :                 set_freepointer(s, cur, next);
    1617                 :            :                 cur = next;
    1618                 :            :         }
    1619                 :            :         set_freepointer(s, cur, NULL);
    1620                 :            : 
    1621                 :            :         return true;
    1622                 :            : }
    1623                 :            : #else
    1624                 :            : static inline int init_cache_random_seq(struct kmem_cache *s)
    1625                 :            : {
    1626                 :            :         return 0;
    1627                 :            : }
    1628                 :            : static inline void init_freelist_randomization(void) { }
    1629                 :            : static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
    1630                 :            : {
    1631                 :            :         return false;
    1632                 :            : }
    1633                 :            : #endif /* CONFIG_SLAB_FREELIST_RANDOM */
    1634                 :            : 
    1635                 :          3 : static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
    1636                 :            : {
    1637                 :            :         struct page *page;
    1638                 :          3 :         struct kmem_cache_order_objects oo = s->oo;
    1639                 :            :         gfp_t alloc_gfp;
    1640                 :            :         void *start, *p, *next;
    1641                 :            :         int idx;
    1642                 :            :         bool shuffle;
    1643                 :            : 
    1644                 :          3 :         flags &= gfp_allowed_mask;
    1645                 :            : 
    1646                 :          3 :         if (gfpflags_allow_blocking(flags))
    1647                 :          3 :                 local_irq_enable();
    1648                 :            : 
    1649                 :          3 :         flags |= s->allocflags;
    1650                 :            : 
    1651                 :            :         /*
    1652                 :            :          * Let the initial higher-order allocation fail under memory pressure
    1653                 :            :          * so we fall-back to the minimum order allocation.
    1654                 :            :          */
    1655                 :          3 :         alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
    1656                 :          3 :         if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
    1657                 :          3 :                 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
    1658                 :            : 
    1659                 :          3 :         page = alloc_slab_page(s, alloc_gfp, node, oo);
    1660                 :          3 :         if (unlikely(!page)) {
    1661                 :          0 :                 oo = s->min;
    1662                 :            :                 alloc_gfp = flags;
    1663                 :            :                 /*
    1664                 :            :                  * Allocation may have failed due to fragmentation.
    1665                 :            :                  * Try a lower order alloc if possible
    1666                 :            :                  */
    1667                 :          0 :                 page = alloc_slab_page(s, alloc_gfp, node, oo);
    1668                 :          0 :                 if (unlikely(!page))
    1669                 :            :                         goto out;
    1670                 :            :                 stat(s, ORDER_FALLBACK);
    1671                 :            :         }
    1672                 :            : 
    1673                 :          3 :         page->objects = oo_objects(oo);
    1674                 :            : 
    1675                 :          3 :         page->slab_cache = s;
    1676                 :            :         __SetPageSlab(page);
    1677                 :          3 :         if (page_is_pfmemalloc(page))
    1678                 :          0 :                 SetPageSlabPfmemalloc(page);
    1679                 :            : 
    1680                 :            :         kasan_poison_slab(page);
    1681                 :            : 
    1682                 :            :         start = page_address(page);
    1683                 :            : 
    1684                 :          3 :         setup_page_debug(s, page, start);
    1685                 :            : 
    1686                 :            :         shuffle = shuffle_freelist(s, page);
    1687                 :            : 
    1688                 :            :         if (!shuffle) {
    1689                 :            :                 start = fixup_red_left(s, start);
    1690                 :          3 :                 start = setup_object(s, page, start);
    1691                 :          3 :                 page->freelist = start;
    1692                 :          3 :                 for (idx = 0, p = start; idx < page->objects - 1; idx++) {
    1693                 :          3 :                         next = p + s->size;
    1694                 :          3 :                         next = setup_object(s, page, next);
    1695                 :            :                         set_freepointer(s, p, next);
    1696                 :            :                         p = next;
    1697                 :            :                 }
    1698                 :            :                 set_freepointer(s, p, NULL);
    1699                 :            :         }
    1700                 :            : 
    1701                 :          3 :         page->inuse = page->objects;
    1702                 :          3 :         page->frozen = 1;
    1703                 :            : 
    1704                 :            : out:
    1705                 :          3 :         if (gfpflags_allow_blocking(flags))
    1706                 :          3 :                 local_irq_disable();
    1707                 :          3 :         if (!page)
    1708                 :            :                 return NULL;
    1709                 :            : 
    1710                 :          3 :         inc_slabs_node(s, page_to_nid(page), page->objects);
    1711                 :            : 
    1712                 :          3 :         return page;
    1713                 :            : }
    1714                 :            : 
    1715                 :          3 : static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
    1716                 :            : {
    1717                 :          3 :         if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
    1718                 :          0 :                 gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
    1719                 :          0 :                 flags &= ~GFP_SLAB_BUG_MASK;
    1720                 :          0 :                 pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
    1721                 :            :                                 invalid_mask, &invalid_mask, flags, &flags);
    1722                 :          0 :                 dump_stack();
    1723                 :            :         }
    1724                 :            : 
    1725                 :          3 :         return allocate_slab(s,
    1726                 :            :                 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
    1727                 :            : }
    1728                 :            : 
    1729                 :          3 : static void __free_slab(struct kmem_cache *s, struct page *page)
    1730                 :            : {
    1731                 :          3 :         int order = compound_order(page);
    1732                 :          3 :         int pages = 1 << order;
    1733                 :            : 
    1734                 :          3 :         if (s->flags & SLAB_CONSISTENCY_CHECKS) {
    1735                 :            :                 void *p;
    1736                 :            : 
    1737                 :          0 :                 slab_pad_check(s, page);
    1738                 :          0 :                 for_each_object(p, s, page_address(page),
    1739                 :            :                                                 page->objects)
    1740                 :          0 :                         check_object(s, page, p, SLUB_RED_INACTIVE);
    1741                 :            :         }
    1742                 :            : 
    1743                 :          3 :         __ClearPageSlabPfmemalloc(page);
    1744                 :            :         __ClearPageSlab(page);
    1745                 :            : 
    1746                 :          3 :         page->mapping = NULL;
    1747                 :          3 :         if (current->reclaim_state)
    1748                 :          0 :                 current->reclaim_state->reclaimed_slab += pages;
    1749                 :            :         uncharge_slab_page(page, order, s);
    1750                 :          3 :         __free_pages(page, order);
    1751                 :          3 : }
    1752                 :            : 
    1753                 :          3 : static void rcu_free_slab(struct rcu_head *h)
    1754                 :            : {
    1755                 :          3 :         struct page *page = container_of(h, struct page, rcu_head);
    1756                 :            : 
    1757                 :          3 :         __free_slab(page->slab_cache, page);
    1758                 :          3 : }
    1759                 :            : 
    1760                 :          3 : static void free_slab(struct kmem_cache *s, struct page *page)
    1761                 :            : {
    1762                 :          3 :         if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
    1763                 :          3 :                 call_rcu(&page->rcu_head, rcu_free_slab);
    1764                 :            :         } else
    1765                 :          3 :                 __free_slab(s, page);
    1766                 :          3 : }
    1767                 :            : 
    1768                 :          3 : static void discard_slab(struct kmem_cache *s, struct page *page)
    1769                 :            : {
    1770                 :          3 :         dec_slabs_node(s, page_to_nid(page), page->objects);
    1771                 :          3 :         free_slab(s, page);
    1772                 :          3 : }
    1773                 :            : 
    1774                 :            : /*
    1775                 :            :  * Management of partially allocated slabs.
    1776                 :            :  */
    1777                 :            : static inline void
    1778                 :            : __add_partial(struct kmem_cache_node *n, struct page *page, int tail)
    1779                 :            : {
    1780                 :          3 :         n->nr_partial++;
    1781                 :          3 :         if (tail == DEACTIVATE_TO_TAIL)
    1782                 :          3 :                 list_add_tail(&page->slab_list, &n->partial);
    1783                 :            :         else
    1784                 :          3 :                 list_add(&page->slab_list, &n->partial);
    1785                 :            : }
    1786                 :            : 
    1787                 :            : static inline void add_partial(struct kmem_cache_node *n,
    1788                 :            :                                 struct page *page, int tail)
    1789                 :            : {
    1790                 :            :         lockdep_assert_held(&n->list_lock);
    1791                 :            :         __add_partial(n, page, tail);
    1792                 :            : }
    1793                 :            : 
    1794                 :            : static inline void remove_partial(struct kmem_cache_node *n,
    1795                 :            :                                         struct page *page)
    1796                 :            : {
    1797                 :            :         lockdep_assert_held(&n->list_lock);
    1798                 :            :         list_del(&page->slab_list);
    1799                 :          3 :         n->nr_partial--;
    1800                 :            : }
    1801                 :            : 
    1802                 :            : /*
    1803                 :            :  * Remove slab from the partial list, freeze it and
    1804                 :            :  * return the pointer to the freelist.
    1805                 :            :  *
    1806                 :            :  * Returns a list of objects or NULL if it fails.
    1807                 :            :  */
    1808                 :          3 : static inline void *acquire_slab(struct kmem_cache *s,
    1809                 :            :                 struct kmem_cache_node *n, struct page *page,
    1810                 :            :                 int mode, int *objects)
    1811                 :            : {
    1812                 :            :         void *freelist;
    1813                 :            :         unsigned long counters;
    1814                 :            :         struct page new;
    1815                 :            : 
    1816                 :            :         lockdep_assert_held(&n->list_lock);
    1817                 :            : 
    1818                 :            :         /*
    1819                 :            :          * Zap the freelist and set the frozen bit.
    1820                 :            :          * The old freelist is the list of objects for the
    1821                 :            :          * per cpu allocation list.
    1822                 :            :          */
    1823                 :          3 :         freelist = page->freelist;
    1824                 :          3 :         counters = page->counters;
    1825                 :          3 :         new.counters = counters;
    1826                 :          3 :         *objects = new.objects - new.inuse;
    1827                 :          3 :         if (mode) {
    1828                 :          3 :                 new.inuse = page->objects;
    1829                 :            :                 new.freelist = NULL;
    1830                 :            :         } else {
    1831                 :            :                 new.freelist = freelist;
    1832                 :            :         }
    1833                 :            : 
    1834                 :            :         VM_BUG_ON(new.frozen);
    1835                 :          3 :         new.frozen = 1;
    1836                 :            : 
    1837                 :          3 :         if (!__cmpxchg_double_slab(s, page,
    1838                 :            :                         freelist, counters,
    1839                 :            :                         new.freelist, new.counters,
    1840                 :            :                         "acquire_slab"))
    1841                 :            :                 return NULL;
    1842                 :            : 
    1843                 :            :         remove_partial(n, page);
    1844                 :          3 :         WARN_ON(!freelist);
    1845                 :          3 :         return freelist;
    1846                 :            : }
    1847                 :            : 
    1848                 :            : static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
    1849                 :            : static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
    1850                 :            : 
    1851                 :            : /*
    1852                 :            :  * Try to allocate a partial slab from a specific node.
    1853                 :            :  */
    1854                 :          3 : static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
    1855                 :            :                                 struct kmem_cache_cpu *c, gfp_t flags)
    1856                 :            : {
    1857                 :            :         struct page *page, *page2;
    1858                 :            :         void *object = NULL;
    1859                 :            :         unsigned int available = 0;
    1860                 :            :         int objects;
    1861                 :            : 
    1862                 :            :         /*
    1863                 :            :          * Racy check. If we mistakenly see no partial slabs then we
    1864                 :            :          * just allocate an empty slab. If we mistakenly try to get a
    1865                 :            :          * partial slab and there is none available then get_partials()
    1866                 :            :          * will return NULL.
    1867                 :            :          */
    1868                 :          3 :         if (!n || !n->nr_partial)
    1869                 :            :                 return NULL;
    1870                 :            : 
    1871                 :            :         spin_lock(&n->list_lock);
    1872                 :          3 :         list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
    1873                 :            :                 void *t;
    1874                 :            : 
    1875                 :          3 :                 if (!pfmemalloc_match(page, flags))
    1876                 :          0 :                         continue;
    1877                 :            : 
    1878                 :          3 :                 t = acquire_slab(s, n, page, object == NULL, &objects);
    1879                 :          3 :                 if (!t)
    1880                 :            :                         break;
    1881                 :            : 
    1882                 :          3 :                 available += objects;
    1883                 :          3 :                 if (!object) {
    1884                 :          3 :                         c->page = page;
    1885                 :            :                         stat(s, ALLOC_FROM_PARTIAL);
    1886                 :            :                         object = t;
    1887                 :            :                 } else {
    1888                 :          3 :                         put_cpu_partial(s, page, 0);
    1889                 :            :                         stat(s, CPU_PARTIAL_NODE);
    1890                 :            :                 }
    1891                 :          3 :                 if (!kmem_cache_has_cpu_partial(s)
    1892                 :          3 :                         || available > slub_cpu_partial(s) / 2)
    1893                 :            :                         break;
    1894                 :            : 
    1895                 :            :         }
    1896                 :            :         spin_unlock(&n->list_lock);
    1897                 :          3 :         return object;
    1898                 :            : }
    1899                 :            : 
    1900                 :            : /*
    1901                 :            :  * Get a page from somewhere. Search in increasing NUMA distances.
    1902                 :            :  */
    1903                 :            : static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
    1904                 :            :                 struct kmem_cache_cpu *c)
    1905                 :            : {
    1906                 :            : #ifdef CONFIG_NUMA
    1907                 :            :         struct zonelist *zonelist;
    1908                 :            :         struct zoneref *z;
    1909                 :            :         struct zone *zone;
    1910                 :            :         enum zone_type high_zoneidx = gfp_zone(flags);
    1911                 :            :         void *object;
    1912                 :            :         unsigned int cpuset_mems_cookie;
    1913                 :            : 
    1914                 :            :         /*
    1915                 :            :          * The defrag ratio allows a configuration of the tradeoffs between
    1916                 :            :          * inter node defragmentation and node local allocations. A lower
    1917                 :            :          * defrag_ratio increases the tendency to do local allocations
    1918                 :            :          * instead of attempting to obtain partial slabs from other nodes.
    1919                 :            :          *
    1920                 :            :          * If the defrag_ratio is set to 0 then kmalloc() always
    1921                 :            :          * returns node local objects. If the ratio is higher then kmalloc()
    1922                 :            :          * may return off node objects because partial slabs are obtained
    1923                 :            :          * from other nodes and filled up.
    1924                 :            :          *
    1925                 :            :          * If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100
    1926                 :            :          * (which makes defrag_ratio = 1000) then every (well almost)
    1927                 :            :          * allocation will first attempt to defrag slab caches on other nodes.
    1928                 :            :          * This means scanning over all nodes to look for partial slabs which
    1929                 :            :          * may be expensive if we do it every time we are trying to find a slab
    1930                 :            :          * with available objects.
    1931                 :            :          */
    1932                 :            :         if (!s->remote_node_defrag_ratio ||
    1933                 :            :                         get_cycles() % 1024 > s->remote_node_defrag_ratio)
    1934                 :            :                 return NULL;
    1935                 :            : 
    1936                 :            :         do {
    1937                 :            :                 cpuset_mems_cookie = read_mems_allowed_begin();
    1938                 :            :                 zonelist = node_zonelist(mempolicy_slab_node(), flags);
    1939                 :            :                 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
    1940                 :            :                         struct kmem_cache_node *n;
    1941                 :            : 
    1942                 :            :                         n = get_node(s, zone_to_nid(zone));
    1943                 :            : 
    1944                 :            :                         if (n && cpuset_zone_allowed(zone, flags) &&
    1945                 :            :                                         n->nr_partial > s->min_partial) {
    1946                 :            :                                 object = get_partial_node(s, n, c, flags);
    1947                 :            :                                 if (object) {
    1948                 :            :                                         /*
    1949                 :            :                                          * Don't check read_mems_allowed_retry()
    1950                 :            :                                          * here - if mems_allowed was updated in
    1951                 :            :                                          * parallel, that was a harmless race
    1952                 :            :                                          * between allocation and the cpuset
    1953                 :            :                                          * update
    1954                 :            :                                          */
    1955                 :            :                                         return object;
    1956                 :            :                                 }
    1957                 :            :                         }
    1958                 :            :                 }
    1959                 :            :         } while (read_mems_allowed_retry(cpuset_mems_cookie));
    1960                 :            : #endif  /* CONFIG_NUMA */
    1961                 :            :         return NULL;
    1962                 :            : }
    1963                 :            : 
    1964                 :            : /*
    1965                 :            :  * Get a partial page, lock it and return it.
    1966                 :            :  */
    1967                 :          3 : static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
    1968                 :            :                 struct kmem_cache_cpu *c)
    1969                 :            : {
    1970                 :            :         void *object;
    1971                 :            :         int searchnode = node;
    1972                 :            : 
    1973                 :          3 :         if (node == NUMA_NO_NODE)
    1974                 :            :                 searchnode = numa_mem_id();
    1975                 :            : 
    1976                 :          3 :         object = get_partial_node(s, get_node(s, searchnode), c, flags);
    1977                 :          3 :         if (object || node != NUMA_NO_NODE)
    1978                 :          3 :                 return object;
    1979                 :            : 
    1980                 :            :         return get_any_partial(s, flags, c);
    1981                 :            : }
    1982                 :            : 
    1983                 :            : #ifdef CONFIG_PREEMPT
    1984                 :            : /*
    1985                 :            :  * Calculate the next globally unique transaction for disambiguiation
    1986                 :            :  * during cmpxchg. The transactions start with the cpu number and are then
    1987                 :            :  * incremented by CONFIG_NR_CPUS.
    1988                 :            :  */
    1989                 :            : #define TID_STEP  roundup_pow_of_two(CONFIG_NR_CPUS)
    1990                 :            : #else
    1991                 :            : /*
    1992                 :            :  * No preemption supported therefore also no need to check for
    1993                 :            :  * different cpus.
    1994                 :            :  */
    1995                 :            : #define TID_STEP 1
    1996                 :            : #endif
    1997                 :            : 
    1998                 :          3 : static inline unsigned long next_tid(unsigned long tid)
    1999                 :            : {
    2000                 :          3 :         return tid + TID_STEP;
    2001                 :            : }
    2002                 :            : 
    2003                 :            : #ifdef SLUB_DEBUG_CMPXCHG
    2004                 :            : static inline unsigned int tid_to_cpu(unsigned long tid)
    2005                 :            : {
    2006                 :            :         return tid % TID_STEP;
    2007                 :            : }
    2008                 :            : 
    2009                 :            : static inline unsigned long tid_to_event(unsigned long tid)
    2010                 :            : {
    2011                 :            :         return tid / TID_STEP;
    2012                 :            : }
    2013                 :            : #endif
    2014                 :            : 
    2015                 :            : static inline unsigned int init_tid(int cpu)
    2016                 :            : {
    2017                 :            :         return cpu;
    2018                 :            : }
    2019                 :            : 
    2020                 :            : static inline void note_cmpxchg_failure(const char *n,
    2021                 :            :                 const struct kmem_cache *s, unsigned long tid)
    2022                 :            : {
    2023                 :            : #ifdef SLUB_DEBUG_CMPXCHG
    2024                 :            :         unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
    2025                 :            : 
    2026                 :            :         pr_info("%s %s: cmpxchg redo ", n, s->name);
    2027                 :            : 
    2028                 :            : #ifdef CONFIG_PREEMPT
    2029                 :            :         if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
    2030                 :            :                 pr_warn("due to cpu change %d -> %d\n",
    2031                 :            :                         tid_to_cpu(tid), tid_to_cpu(actual_tid));
    2032                 :            :         else
    2033                 :            : #endif
    2034                 :            :         if (tid_to_event(tid) != tid_to_event(actual_tid))
    2035                 :            :                 pr_warn("due to cpu running other code. Event %ld->%ld\n",
    2036                 :            :                         tid_to_event(tid), tid_to_event(actual_tid));
    2037                 :            :         else
    2038                 :            :                 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
    2039                 :            :                         actual_tid, tid, next_tid(tid));
    2040                 :            : #endif
    2041                 :            :         stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
    2042                 :            : }
    2043                 :            : 
    2044                 :          3 : static void init_kmem_cache_cpus(struct kmem_cache *s)
    2045                 :            : {
    2046                 :            :         int cpu;
    2047                 :            : 
    2048                 :          3 :         for_each_possible_cpu(cpu)
    2049                 :          3 :                 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
    2050                 :          3 : }
    2051                 :            : 
    2052                 :            : /*
    2053                 :            :  * Remove the cpu slab
    2054                 :            :  */
    2055                 :          3 : static void deactivate_slab(struct kmem_cache *s, struct page *page,
    2056                 :            :                                 void *freelist, struct kmem_cache_cpu *c)
    2057                 :            : {
    2058                 :            :         enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
    2059                 :            :         struct kmem_cache_node *n = get_node(s, page_to_nid(page));
    2060                 :            :         int lock = 0;
    2061                 :            :         enum slab_modes l = M_NONE, m = M_NONE;
    2062                 :            :         void *nextfree;
    2063                 :            :         int tail = DEACTIVATE_TO_HEAD;
    2064                 :            :         struct page new;
    2065                 :            :         struct page old;
    2066                 :            : 
    2067                 :          3 :         if (page->freelist) {
    2068                 :            :                 stat(s, DEACTIVATE_REMOTE_FREES);
    2069                 :            :                 tail = DEACTIVATE_TO_TAIL;
    2070                 :            :         }
    2071                 :            : 
    2072                 :            :         /*
    2073                 :            :          * Stage one: Free all available per cpu objects back
    2074                 :            :          * to the page freelist while it is still frozen. Leave the
    2075                 :            :          * last one.
    2076                 :            :          *
    2077                 :            :          * There is no need to take the list->lock because the page
    2078                 :            :          * is still frozen.
    2079                 :            :          */
    2080                 :          3 :         while (freelist && (nextfree = get_freepointer(s, freelist))) {
    2081                 :            :                 void *prior;
    2082                 :            :                 unsigned long counters;
    2083                 :            : 
    2084                 :            :                 /*
    2085                 :            :                  * If 'nextfree' is invalid, it is possible that the object at
    2086                 :            :                  * 'freelist' is already corrupted.  So isolate all objects
    2087                 :            :                  * starting at 'freelist'.
    2088                 :            :                  */
    2089                 :          3 :                 if (freelist_corrupted(s, page, freelist, nextfree))
    2090                 :            :                         break;
    2091                 :            : 
    2092                 :            :                 do {
    2093                 :          3 :                         prior = page->freelist;
    2094                 :          3 :                         counters = page->counters;
    2095                 :            :                         set_freepointer(s, freelist, prior);
    2096                 :          3 :                         new.counters = counters;
    2097                 :          3 :                         new.inuse--;
    2098                 :            :                         VM_BUG_ON(!new.frozen);
    2099                 :            : 
    2100                 :          3 :                 } while (!__cmpxchg_double_slab(s, page,
    2101                 :            :                         prior, counters,
    2102                 :            :                         freelist, new.counters,
    2103                 :          3 :                         "drain percpu freelist"));
    2104                 :            : 
    2105                 :            :                 freelist = nextfree;
    2106                 :            :         }
    2107                 :            : 
    2108                 :            :         /*
    2109                 :            :          * Stage two: Ensure that the page is unfrozen while the
    2110                 :            :          * list presence reflects the actual number of objects
    2111                 :            :          * during unfreeze.
    2112                 :            :          *
    2113                 :            :          * We setup the list membership and then perform a cmpxchg
    2114                 :            :          * with the count. If there is a mismatch then the page
    2115                 :            :          * is not unfrozen but the page is on the wrong list.
    2116                 :            :          *
    2117                 :            :          * Then we restart the process which may have to remove
    2118                 :            :          * the page from the list that we just put it on again
    2119                 :            :          * because the number of objects in the slab may have
    2120                 :            :          * changed.
    2121                 :            :          */
    2122                 :            : redo:
    2123                 :            : 
    2124                 :          3 :         old.freelist = page->freelist;
    2125                 :          3 :         old.counters = page->counters;
    2126                 :            :         VM_BUG_ON(!old.frozen);
    2127                 :            : 
    2128                 :            :         /* Determine target state of the slab */
    2129                 :          3 :         new.counters = old.counters;
    2130                 :          3 :         if (freelist) {
    2131                 :          3 :                 new.inuse--;
    2132                 :            :                 set_freepointer(s, freelist, old.freelist);
    2133                 :            :                 new.freelist = freelist;
    2134                 :            :         } else
    2135                 :            :                 new.freelist = old.freelist;
    2136                 :            : 
    2137                 :          3 :         new.frozen = 0;
    2138                 :            : 
    2139                 :          3 :         if (!new.inuse && n->nr_partial >= s->min_partial)
    2140                 :            :                 m = M_FREE;
    2141                 :          3 :         else if (new.freelist) {
    2142                 :            :                 m = M_PARTIAL;
    2143                 :          3 :                 if (!lock) {
    2144                 :            :                         lock = 1;
    2145                 :            :                         /*
    2146                 :            :                          * Taking the spinlock removes the possibility
    2147                 :            :                          * that acquire_slab() will see a slab page that
    2148                 :            :                          * is frozen
    2149                 :            :                          */
    2150                 :            :                         spin_lock(&n->list_lock);
    2151                 :            :                 }
    2152                 :            :         } else {
    2153                 :            :                 m = M_FULL;
    2154                 :          3 :                 if (kmem_cache_debug(s) && !lock) {
    2155                 :            :                         lock = 1;
    2156                 :            :                         /*
    2157                 :            :                          * This also ensures that the scanning of full
    2158                 :            :                          * slabs from diagnostic functions will not see
    2159                 :            :                          * any frozen slabs.
    2160                 :            :                          */
    2161                 :            :                         spin_lock(&n->list_lock);
    2162                 :            :                 }
    2163                 :            :         }
    2164                 :            : 
    2165                 :          3 :         if (l != m) {
    2166                 :          3 :                 if (l == M_PARTIAL)
    2167                 :            :                         remove_partial(n, page);
    2168                 :          3 :                 else if (l == M_FULL)
    2169                 :            :                         remove_full(s, n, page);
    2170                 :            : 
    2171                 :          3 :                 if (m == M_PARTIAL)
    2172                 :            :                         add_partial(n, page, tail);
    2173                 :          3 :                 else if (m == M_FULL)
    2174                 :            :                         add_full(s, n, page);
    2175                 :            :         }
    2176                 :            : 
    2177                 :            :         l = m;
    2178                 :          3 :         if (!__cmpxchg_double_slab(s, page,
    2179                 :            :                                 old.freelist, old.counters,
    2180                 :            :                                 new.freelist, new.counters,
    2181                 :            :                                 "unfreezing slab"))
    2182                 :            :                 goto redo;
    2183                 :            : 
    2184                 :          3 :         if (lock)
    2185                 :            :                 spin_unlock(&n->list_lock);
    2186                 :            : 
    2187                 :          3 :         if (m == M_PARTIAL)
    2188                 :            :                 stat(s, tail);
    2189                 :          3 :         else if (m == M_FULL)
    2190                 :            :                 stat(s, DEACTIVATE_FULL);
    2191                 :          1 :         else if (m == M_FREE) {
    2192                 :            :                 stat(s, DEACTIVATE_EMPTY);
    2193                 :          1 :                 discard_slab(s, page);
    2194                 :            :                 stat(s, FREE_SLAB);
    2195                 :            :         }
    2196                 :            : 
    2197                 :          3 :         c->page = NULL;
    2198                 :          3 :         c->freelist = NULL;
    2199                 :          3 : }
    2200                 :            : 
    2201                 :            : /*
    2202                 :            :  * Unfreeze all the cpu partial slabs.
    2203                 :            :  *
    2204                 :            :  * This function must be called with interrupts disabled
    2205                 :            :  * for the cpu using c (or some other guarantee must be there
    2206                 :            :  * to guarantee no concurrent accesses).
    2207                 :            :  */
    2208                 :          3 : static void unfreeze_partials(struct kmem_cache *s,
    2209                 :            :                 struct kmem_cache_cpu *c)
    2210                 :            : {
    2211                 :            : #ifdef CONFIG_SLUB_CPU_PARTIAL
    2212                 :            :         struct kmem_cache_node *n = NULL, *n2 = NULL;
    2213                 :            :         struct page *page, *discard_page = NULL;
    2214                 :            : 
    2215                 :          3 :         while ((page = c->partial)) {
    2216                 :            :                 struct page new;
    2217                 :            :                 struct page old;
    2218                 :            : 
    2219                 :          3 :                 c->partial = page->next;
    2220                 :            : 
    2221                 :            :                 n2 = get_node(s, page_to_nid(page));
    2222                 :          3 :                 if (n != n2) {
    2223                 :          3 :                         if (n)
    2224                 :            :                                 spin_unlock(&n->list_lock);
    2225                 :            : 
    2226                 :            :                         n = n2;
    2227                 :            :                         spin_lock(&n->list_lock);
    2228                 :            :                 }
    2229                 :            : 
    2230                 :            :                 do {
    2231                 :            : 
    2232                 :          3 :                         old.freelist = page->freelist;
    2233                 :          3 :                         old.counters = page->counters;
    2234                 :            :                         VM_BUG_ON(!old.frozen);
    2235                 :            : 
    2236                 :          3 :                         new.counters = old.counters;
    2237                 :            :                         new.freelist = old.freelist;
    2238                 :            : 
    2239                 :          3 :                         new.frozen = 0;
    2240                 :            : 
    2241                 :          3 :                 } while (!__cmpxchg_double_slab(s, page,
    2242                 :            :                                 old.freelist, old.counters,
    2243                 :            :                                 new.freelist, new.counters,
    2244                 :          3 :                                 "unfreezing slab"));
    2245                 :            : 
    2246                 :          3 :                 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
    2247                 :          3 :                         page->next = discard_page;
    2248                 :          3 :                         discard_page = page;
    2249                 :            :                 } else {
    2250                 :            :                         add_partial(n, page, DEACTIVATE_TO_TAIL);
    2251                 :            :                         stat(s, FREE_ADD_PARTIAL);
    2252                 :            :                 }
    2253                 :            :         }
    2254                 :            : 
    2255                 :          3 :         if (n)
    2256                 :            :                 spin_unlock(&n->list_lock);
    2257                 :            : 
    2258                 :          3 :         while (discard_page) {
    2259                 :            :                 page = discard_page;
    2260                 :          3 :                 discard_page = discard_page->next;
    2261                 :            : 
    2262                 :            :                 stat(s, DEACTIVATE_EMPTY);
    2263                 :          3 :                 discard_slab(s, page);
    2264                 :            :                 stat(s, FREE_SLAB);
    2265                 :            :         }
    2266                 :            : #endif  /* CONFIG_SLUB_CPU_PARTIAL */
    2267                 :          3 : }
    2268                 :            : 
    2269                 :            : /*
    2270                 :            :  * Put a page that was just frozen (in __slab_free|get_partial_node) into a
    2271                 :            :  * partial page slot if available.
    2272                 :            :  *
    2273                 :            :  * If we did not find a slot then simply move all the partials to the
    2274                 :            :  * per node partial list.
    2275                 :            :  */
    2276                 :          3 : static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
    2277                 :            : {
    2278                 :            : #ifdef CONFIG_SLUB_CPU_PARTIAL
    2279                 :            :         struct page *oldpage;
    2280                 :            :         int pages;
    2281                 :            :         int pobjects;
    2282                 :            : 
    2283                 :          3 :         preempt_disable();
    2284                 :            :         do {
    2285                 :            :                 pages = 0;
    2286                 :            :                 pobjects = 0;
    2287                 :          3 :                 oldpage = this_cpu_read(s->cpu_slab->partial);
    2288                 :            : 
    2289                 :          3 :                 if (oldpage) {
    2290                 :          3 :                         pobjects = oldpage->pobjects;
    2291                 :          3 :                         pages = oldpage->pages;
    2292                 :          3 :                         if (drain && pobjects > s->cpu_partial) {
    2293                 :            :                                 unsigned long flags;
    2294                 :            :                                 /*
    2295                 :            :                                  * partial array is full. Move the existing
    2296                 :            :                                  * set to the per node partial list.
    2297                 :            :                                  */
    2298                 :          3 :                                 local_irq_save(flags);
    2299                 :          3 :                                 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
    2300                 :          3 :                                 local_irq_restore(flags);
    2301                 :            :                                 oldpage = NULL;
    2302                 :            :                                 pobjects = 0;
    2303                 :            :                                 pages = 0;
    2304                 :            :                                 stat(s, CPU_PARTIAL_DRAIN);
    2305                 :            :                         }
    2306                 :            :                 }
    2307                 :            : 
    2308                 :          3 :                 pages++;
    2309                 :          3 :                 pobjects += page->objects - page->inuse;
    2310                 :            : 
    2311                 :          3 :                 page->pages = pages;
    2312                 :          3 :                 page->pobjects = pobjects;
    2313                 :          3 :                 page->next = oldpage;
    2314                 :            : 
    2315                 :          3 :         } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
    2316                 :          3 :                                                                 != oldpage);
    2317                 :          3 :         if (unlikely(!s->cpu_partial)) {
    2318                 :            :                 unsigned long flags;
    2319                 :            : 
    2320                 :          0 :                 local_irq_save(flags);
    2321                 :          0 :                 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
    2322                 :          0 :                 local_irq_restore(flags);
    2323                 :            :         }
    2324                 :          3 :         preempt_enable();
    2325                 :            : #endif  /* CONFIG_SLUB_CPU_PARTIAL */
    2326                 :          3 : }
    2327                 :            : 
    2328                 :            : static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
    2329                 :            : {
    2330                 :            :         stat(s, CPUSLAB_FLUSH);
    2331                 :          3 :         deactivate_slab(s, c->page, c->freelist, c);
    2332                 :            : 
    2333                 :          3 :         c->tid = next_tid(c->tid);
    2334                 :            : }
    2335                 :            : 
    2336                 :            : /*
    2337                 :            :  * Flush cpu slab.
    2338                 :            :  *
    2339                 :            :  * Called from IPI handler with interrupts disabled.
    2340                 :            :  */
    2341                 :          3 : static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
    2342                 :            : {
    2343                 :          3 :         struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
    2344                 :            : 
    2345                 :          3 :         if (c->page)
    2346                 :            :                 flush_slab(s, c);
    2347                 :            : 
    2348                 :          3 :         unfreeze_partials(s, c);
    2349                 :          3 : }
    2350                 :            : 
    2351                 :          0 : static void flush_cpu_slab(void *d)
    2352                 :            : {
    2353                 :            :         struct kmem_cache *s = d;
    2354                 :            : 
    2355                 :          0 :         __flush_cpu_slab(s, smp_processor_id());
    2356                 :          0 : }
    2357                 :            : 
    2358                 :          0 : static bool has_cpu_slab(int cpu, void *info)
    2359                 :            : {
    2360                 :            :         struct kmem_cache *s = info;
    2361                 :          0 :         struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
    2362                 :            : 
    2363                 :          0 :         return c->page || slub_percpu_partial(c);
    2364                 :            : }
    2365                 :            : 
    2366                 :            : static void flush_all(struct kmem_cache *s)
    2367                 :            : {
    2368                 :          0 :         on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
    2369                 :            : }
    2370                 :            : 
    2371                 :            : /*
    2372                 :            :  * Use the cpu notifier to insure that the cpu slabs are flushed when
    2373                 :            :  * necessary.
    2374                 :            :  */
    2375                 :          0 : static int slub_cpu_dead(unsigned int cpu)
    2376                 :            : {
    2377                 :            :         struct kmem_cache *s;
    2378                 :            :         unsigned long flags;
    2379                 :            : 
    2380                 :          0 :         mutex_lock(&slab_mutex);
    2381                 :          0 :         list_for_each_entry(s, &slab_caches, list) {
    2382                 :          0 :                 local_irq_save(flags);
    2383                 :          0 :                 __flush_cpu_slab(s, cpu);
    2384                 :          0 :                 local_irq_restore(flags);
    2385                 :            :         }
    2386                 :          0 :         mutex_unlock(&slab_mutex);
    2387                 :          0 :         return 0;
    2388                 :            : }
    2389                 :            : 
    2390                 :            : /*
    2391                 :            :  * Check if the objects in a per cpu structure fit numa
    2392                 :            :  * locality expectations.
    2393                 :            :  */
    2394                 :          3 : static inline int node_match(struct page *page, int node)
    2395                 :            : {
    2396                 :            : #ifdef CONFIG_NUMA
    2397                 :            :         if (node != NUMA_NO_NODE && page_to_nid(page) != node)
    2398                 :            :                 return 0;
    2399                 :            : #endif
    2400                 :          3 :         return 1;
    2401                 :            : }
    2402                 :            : 
    2403                 :            : #ifdef CONFIG_SLUB_DEBUG
    2404                 :          0 : static int count_free(struct page *page)
    2405                 :            : {
    2406                 :          0 :         return page->objects - page->inuse;
    2407                 :            : }
    2408                 :            : 
    2409                 :            : static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
    2410                 :            : {
    2411                 :            :         return atomic_long_read(&n->total_objects);
    2412                 :            : }
    2413                 :            : #endif /* CONFIG_SLUB_DEBUG */
    2414                 :            : 
    2415                 :            : #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
    2416                 :          0 : static unsigned long count_partial(struct kmem_cache_node *n,
    2417                 :            :                                         int (*get_count)(struct page *))
    2418                 :            : {
    2419                 :            :         unsigned long flags;
    2420                 :            :         unsigned long x = 0;
    2421                 :            :         struct page *page;
    2422                 :            : 
    2423                 :          0 :         spin_lock_irqsave(&n->list_lock, flags);
    2424                 :          0 :         list_for_each_entry(page, &n->partial, slab_list)
    2425                 :          0 :                 x += get_count(page);
    2426                 :            :         spin_unlock_irqrestore(&n->list_lock, flags);
    2427                 :          0 :         return x;
    2428                 :            : }
    2429                 :            : #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
    2430                 :            : 
    2431                 :            : static noinline void
    2432                 :          0 : slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
    2433                 :            : {
    2434                 :            : #ifdef CONFIG_SLUB_DEBUG
    2435                 :            :         static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
    2436                 :            :                                       DEFAULT_RATELIMIT_BURST);
    2437                 :            :         int node;
    2438                 :            :         struct kmem_cache_node *n;
    2439                 :            : 
    2440                 :          0 :         if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
    2441                 :          0 :                 return;
    2442                 :            : 
    2443                 :          0 :         pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
    2444                 :            :                 nid, gfpflags, &gfpflags);
    2445                 :          0 :         pr_warn("  cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
    2446                 :            :                 s->name, s->object_size, s->size, oo_order(s->oo),
    2447                 :            :                 oo_order(s->min));
    2448                 :            : 
    2449                 :          0 :         if (oo_order(s->min) > get_order(s->object_size))
    2450                 :          0 :                 pr_warn("  %s debugging increased min order, use slub_debug=O to disable.\n",
    2451                 :            :                         s->name);
    2452                 :            : 
    2453                 :          0 :         for_each_kmem_cache_node(s, node, n) {
    2454                 :            :                 unsigned long nr_slabs;
    2455                 :            :                 unsigned long nr_objs;
    2456                 :            :                 unsigned long nr_free;
    2457                 :            : 
    2458                 :          0 :                 nr_free  = count_partial(n, count_free);
    2459                 :            :                 nr_slabs = node_nr_slabs(n);
    2460                 :            :                 nr_objs  = node_nr_objs(n);
    2461                 :            : 
    2462                 :          0 :                 pr_warn("  node %d: slabs: %ld, objs: %ld, free: %ld\n",
    2463                 :            :                         node, nr_slabs, nr_objs, nr_free);
    2464                 :            :         }
    2465                 :            : #endif
    2466                 :            : }
    2467                 :            : 
    2468                 :          3 : static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
    2469                 :            :                         int node, struct kmem_cache_cpu **pc)
    2470                 :            : {
    2471                 :            :         void *freelist;
    2472                 :          3 :         struct kmem_cache_cpu *c = *pc;
    2473                 :            :         struct page *page;
    2474                 :            : 
    2475                 :          3 :         WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
    2476                 :            : 
    2477                 :          3 :         freelist = get_partial(s, flags, node, c);
    2478                 :            : 
    2479                 :          3 :         if (freelist)
    2480                 :            :                 return freelist;
    2481                 :            : 
    2482                 :          3 :         page = new_slab(s, flags, node);
    2483                 :          3 :         if (page) {
    2484                 :          3 :                 c = raw_cpu_ptr(s->cpu_slab);
    2485                 :          3 :                 if (c->page)
    2486                 :            :                         flush_slab(s, c);
    2487                 :            : 
    2488                 :            :                 /*
    2489                 :            :                  * No other reference to the page yet so we can
    2490                 :            :                  * muck around with it freely without cmpxchg
    2491                 :            :                  */
    2492                 :          3 :                 freelist = page->freelist;
    2493                 :          3 :                 page->freelist = NULL;
    2494                 :            : 
    2495                 :            :                 stat(s, ALLOC_SLAB);
    2496                 :          3 :                 c->page = page;
    2497                 :          3 :                 *pc = c;
    2498                 :            :         }
    2499                 :            : 
    2500                 :          3 :         return freelist;
    2501                 :            : }
    2502                 :            : 
    2503                 :          3 : static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
    2504                 :            : {
    2505                 :          3 :         if (unlikely(PageSlabPfmemalloc(page)))
    2506                 :          0 :                 return gfp_pfmemalloc_allowed(gfpflags);
    2507                 :            : 
    2508                 :            :         return true;
    2509                 :            : }
    2510                 :            : 
    2511                 :            : /*
    2512                 :            :  * Check the page->freelist of a page and either transfer the freelist to the
    2513                 :            :  * per cpu freelist or deactivate the page.
    2514                 :            :  *
    2515                 :            :  * The page is still frozen if the return value is not NULL.
    2516                 :            :  *
    2517                 :            :  * If this function returns NULL then the page has been unfrozen.
    2518                 :            :  *
    2519                 :            :  * This function must be called with interrupt disabled.
    2520                 :            :  */
    2521                 :          3 : static inline void *get_freelist(struct kmem_cache *s, struct page *page)
    2522                 :            : {
    2523                 :            :         struct page new;
    2524                 :            :         unsigned long counters;
    2525                 :            :         void *freelist;
    2526                 :            : 
    2527                 :            :         do {
    2528                 :          3 :                 freelist = page->freelist;
    2529                 :          3 :                 counters = page->counters;
    2530                 :            : 
    2531                 :          3 :                 new.counters = counters;
    2532                 :            :                 VM_BUG_ON(!new.frozen);
    2533                 :            : 
    2534                 :          3 :                 new.inuse = page->objects;
    2535                 :          3 :                 new.frozen = freelist != NULL;
    2536                 :            : 
    2537                 :          3 :         } while (!__cmpxchg_double_slab(s, page,
    2538                 :            :                 freelist, counters,
    2539                 :            :                 NULL, new.counters,
    2540                 :          3 :                 "get_freelist"));
    2541                 :            : 
    2542                 :          3 :         return freelist;
    2543                 :            : }
    2544                 :            : 
    2545                 :            : /*
    2546                 :            :  * Slow path. The lockless freelist is empty or we need to perform
    2547                 :            :  * debugging duties.
    2548                 :            :  *
    2549                 :            :  * Processing is still very fast if new objects have been freed to the
    2550                 :            :  * regular freelist. In that case we simply take over the regular freelist
    2551                 :            :  * as the lockless freelist and zap the regular freelist.
    2552                 :            :  *
    2553                 :            :  * If that is not working then we fall back to the partial lists. We take the
    2554                 :            :  * first element of the freelist as the object to allocate now and move the
    2555                 :            :  * rest of the freelist to the lockless freelist.
    2556                 :            :  *
    2557                 :            :  * And if we were unable to get a new slab from the partial slab lists then
    2558                 :            :  * we need to allocate a new slab. This is the slowest path since it involves
    2559                 :            :  * a call to the page allocator and the setup of a new slab.
    2560                 :            :  *
    2561                 :            :  * Version of __slab_alloc to use when we know that interrupts are
    2562                 :            :  * already disabled (which is the case for bulk allocation).
    2563                 :            :  */
    2564                 :          3 : static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
    2565                 :            :                           unsigned long addr, struct kmem_cache_cpu *c)
    2566                 :            : {
    2567                 :            :         void *freelist;
    2568                 :            :         struct page *page;
    2569                 :            : 
    2570                 :          3 :         page = c->page;
    2571                 :          3 :         if (!page) {
    2572                 :            :                 /*
    2573                 :            :                  * if the node is not online or has no normal memory, just
    2574                 :            :                  * ignore the node constraint
    2575                 :            :                  */
    2576                 :          3 :                 if (unlikely(node != NUMA_NO_NODE &&
    2577                 :            :                              !node_state(node, N_NORMAL_MEMORY)))
    2578                 :            :                         node = NUMA_NO_NODE;
    2579                 :            :                 goto new_slab;
    2580                 :            :         }
    2581                 :            : redo:
    2582                 :            : 
    2583                 :            :         if (unlikely(!node_match(page, node))) {
    2584                 :            :                 /*
    2585                 :            :                  * same as above but node_match() being false already
    2586                 :            :                  * implies node != NUMA_NO_NODE
    2587                 :            :                  */
    2588                 :            :                 if (!node_state(node, N_NORMAL_MEMORY)) {
    2589                 :            :                         node = NUMA_NO_NODE;
    2590                 :            :                         goto redo;
    2591                 :            :                 } else {
    2592                 :            :                         stat(s, ALLOC_NODE_MISMATCH);
    2593                 :            :                         deactivate_slab(s, page, c->freelist, c);
    2594                 :            :                         goto new_slab;
    2595                 :            :                 }
    2596                 :            :         }
    2597                 :            : 
    2598                 :            :         /*
    2599                 :            :          * By rights, we should be searching for a slab page that was
    2600                 :            :          * PFMEMALLOC but right now, we are losing the pfmemalloc
    2601                 :            :          * information when the page leaves the per-cpu allocator
    2602                 :            :          */
    2603                 :          3 :         if (unlikely(!pfmemalloc_match(page, gfpflags))) {
    2604                 :          0 :                 deactivate_slab(s, page, c->freelist, c);
    2605                 :          0 :                 goto new_slab;
    2606                 :            :         }
    2607                 :            : 
    2608                 :            :         /* must check again c->freelist in case of cpu migration or IRQ */
    2609                 :          3 :         freelist = c->freelist;
    2610                 :          3 :         if (freelist)
    2611                 :            :                 goto load_freelist;
    2612                 :            : 
    2613                 :          3 :         freelist = get_freelist(s, page);
    2614                 :            : 
    2615                 :          3 :         if (!freelist) {
    2616                 :          3 :                 c->page = NULL;
    2617                 :            :                 stat(s, DEACTIVATE_BYPASS);
    2618                 :            :                 goto new_slab;
    2619                 :            :         }
    2620                 :            : 
    2621                 :            :         stat(s, ALLOC_REFILL);
    2622                 :            : 
    2623                 :            : load_freelist:
    2624                 :            :         /*
    2625                 :            :          * freelist is pointing to the list of objects to be used.
    2626                 :            :          * page is pointing to the page from which the objects are obtained.
    2627                 :            :          * That page must be frozen for per cpu allocations to work.
    2628                 :            :          */
    2629                 :            :         VM_BUG_ON(!c->page->frozen);
    2630                 :          3 :         c->freelist = get_freepointer(s, freelist);
    2631                 :          3 :         c->tid = next_tid(c->tid);
    2632                 :          3 :         return freelist;
    2633                 :            : 
    2634                 :            : new_slab:
    2635                 :            : 
    2636                 :          3 :         if (slub_percpu_partial(c)) {
    2637                 :          3 :                 page = c->page = slub_percpu_partial(c);
    2638                 :          3 :                 slub_set_percpu_partial(c, page);
    2639                 :            :                 stat(s, CPU_PARTIAL_ALLOC);
    2640                 :            :                 goto redo;
    2641                 :            :         }
    2642                 :            : 
    2643                 :          3 :         freelist = new_slab_objects(s, gfpflags, node, &c);
    2644                 :            : 
    2645                 :          3 :         if (unlikely(!freelist)) {
    2646                 :          0 :                 slab_out_of_memory(s, gfpflags, node);
    2647                 :          0 :                 return NULL;
    2648                 :            :         }
    2649                 :            : 
    2650                 :          3 :         page = c->page;
    2651                 :          3 :         if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
    2652                 :            :                 goto load_freelist;
    2653                 :            : 
    2654                 :            :         /* Only entered in the debug case */
    2655                 :          3 :         if (kmem_cache_debug(s) &&
    2656                 :          0 :                         !alloc_debug_processing(s, page, freelist, addr))
    2657                 :            :                 goto new_slab;  /* Slab failed checks. Next slab needed */
    2658                 :            : 
    2659                 :          3 :         deactivate_slab(s, page, get_freepointer(s, freelist), c);
    2660                 :          0 :         return freelist;
    2661                 :            : }
    2662                 :            : 
    2663                 :            : /*
    2664                 :            :  * Another one that disabled interrupt and compensates for possible
    2665                 :            :  * cpu changes by refetching the per cpu area pointer.
    2666                 :            :  */
    2667                 :          3 : static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
    2668                 :            :                           unsigned long addr, struct kmem_cache_cpu *c)
    2669                 :            : {
    2670                 :            :         void *p;
    2671                 :            :         unsigned long flags;
    2672                 :            : 
    2673                 :          3 :         local_irq_save(flags);
    2674                 :            : #ifdef CONFIG_PREEMPT
    2675                 :            :         /*
    2676                 :            :          * We may have been preempted and rescheduled on a different
    2677                 :            :          * cpu before disabling interrupts. Need to reload cpu area
    2678                 :            :          * pointer.
    2679                 :            :          */
    2680                 :            :         c = this_cpu_ptr(s->cpu_slab);
    2681                 :            : #endif
    2682                 :            : 
    2683                 :          3 :         p = ___slab_alloc(s, gfpflags, node, addr, c);
    2684                 :          3 :         local_irq_restore(flags);
    2685                 :          3 :         return p;
    2686                 :            : }
    2687                 :            : 
    2688                 :            : /*
    2689                 :            :  * If the object has been wiped upon free, make sure it's fully initialized by
    2690                 :            :  * zeroing out freelist pointer.
    2691                 :            :  */
    2692                 :            : static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
    2693                 :            :                                                    void *obj)
    2694                 :            : {
    2695                 :          3 :         if (unlikely(slab_want_init_on_free(s)) && obj)
    2696                 :          0 :                 memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
    2697                 :            : }
    2698                 :            : 
    2699                 :            : /*
    2700                 :            :  * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
    2701                 :            :  * have the fastpath folded into their functions. So no function call
    2702                 :            :  * overhead for requests that can be satisfied on the fastpath.
    2703                 :            :  *
    2704                 :            :  * The fastpath works by first checking if the lockless freelist can be used.
    2705                 :            :  * If not then __slab_alloc is called for slow processing.
    2706                 :            :  *
    2707                 :            :  * Otherwise we can simply pick the next object from the lockless free list.
    2708                 :            :  */
    2709                 :            : static __always_inline void *slab_alloc_node(struct kmem_cache *s,
    2710                 :            :                 gfp_t gfpflags, int node, unsigned long addr)
    2711                 :            : {
    2712                 :            :         void *object;
    2713                 :            :         struct kmem_cache_cpu *c;
    2714                 :            :         struct page *page;
    2715                 :            :         unsigned long tid;
    2716                 :            : 
    2717                 :          3 :         s = slab_pre_alloc_hook(s, gfpflags);
    2718                 :          3 :         if (!s)
    2719                 :            :                 return NULL;
    2720                 :            : redo:
    2721                 :            :         /*
    2722                 :            :          * Must read kmem_cache cpu data via this cpu ptr. Preemption is
    2723                 :            :          * enabled. We may switch back and forth between cpus while
    2724                 :            :          * reading from one cpu area. That does not matter as long
    2725                 :            :          * as we end up on the original cpu again when doing the cmpxchg.
    2726                 :            :          *
    2727                 :            :          * We should guarantee that tid and kmem_cache are retrieved on
    2728                 :            :          * the same cpu. It could be different if CONFIG_PREEMPT so we need
    2729                 :            :          * to check if it is matched or not.
    2730                 :            :          */
    2731                 :            :         do {
    2732                 :          3 :                 tid = this_cpu_read(s->cpu_slab->tid);
    2733                 :          3 :                 c = raw_cpu_ptr(s->cpu_slab);
    2734                 :            :         } while (IS_ENABLED(CONFIG_PREEMPT) &&
    2735                 :            :                  unlikely(tid != READ_ONCE(c->tid)));
    2736                 :            : 
    2737                 :            :         /*
    2738                 :            :          * Irqless object alloc/free algorithm used here depends on sequence
    2739                 :            :          * of fetching cpu_slab's data. tid should be fetched before anything
    2740                 :            :          * on c to guarantee that object and page associated with previous tid
    2741                 :            :          * won't be used with current tid. If we fetch tid first, object and
    2742                 :            :          * page could be one associated with next tid and our alloc/free
    2743                 :            :          * request will be failed. In this case, we will retry. So, no problem.
    2744                 :            :          */
    2745                 :          3 :         barrier();
    2746                 :            : 
    2747                 :            :         /*
    2748                 :            :          * The transaction ids are globally unique per cpu and per operation on
    2749                 :            :          * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
    2750                 :            :          * occurs on the right processor and that there was no operation on the
    2751                 :            :          * linked list in between.
    2752                 :            :          */
    2753                 :            : 
    2754                 :          3 :         object = c->freelist;
    2755                 :          3 :         page = c->page;
    2756                 :          3 :         if (unlikely(!object || !node_match(page, node))) {
    2757                 :          3 :                 object = __slab_alloc(s, gfpflags, node, addr, c);
    2758                 :            :                 stat(s, ALLOC_SLOWPATH);
    2759                 :            :         } else {
    2760                 :          3 :                 void *next_object = get_freepointer_safe(s, object);
    2761                 :            : 
    2762                 :            :                 /*
    2763                 :            :                  * The cmpxchg will only match if there was no additional
    2764                 :            :                  * operation and if we are on the right processor.
    2765                 :            :                  *
    2766                 :            :                  * The cmpxchg does the following atomically (without lock
    2767                 :            :                  * semantics!)
    2768                 :            :                  * 1. Relocate first pointer to the current per cpu area.
    2769                 :            :                  * 2. Verify that tid and freelist have not been changed
    2770                 :            :                  * 3. If they were not changed replace tid and freelist
    2771                 :            :                  *
    2772                 :            :                  * Since this is without lock semantics the protection is only
    2773                 :            :                  * against code executing on this cpu *not* from access by
    2774                 :            :                  * other cpus.
    2775                 :            :                  */
    2776                 :          3 :                 if (unlikely(!this_cpu_cmpxchg_double(
    2777                 :            :                                 s->cpu_slab->freelist, s->cpu_slab->tid,
    2778                 :            :                                 object, tid,
    2779                 :            :                                 next_object, next_tid(tid)))) {
    2780                 :            : 
    2781                 :            :                         note_cmpxchg_failure("slab_alloc", s, tid);
    2782                 :            :                         goto redo;
    2783                 :            :                 }
    2784                 :          3 :                 prefetch_freepointer(s, next_object);
    2785                 :            :                 stat(s, ALLOC_FASTPATH);
    2786                 :            :         }
    2787                 :            : 
    2788                 :          3 :         maybe_wipe_obj_freeptr(s, object);
    2789                 :            : 
    2790                 :          3 :         if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
    2791                 :          3 :                 memset(object, 0, s->object_size);
    2792                 :            : 
    2793                 :          3 :         slab_post_alloc_hook(s, gfpflags, 1, &object);
    2794                 :            : 
    2795                 :          3 :         return object;
    2796                 :            : }
    2797                 :            : 
    2798                 :            : static __always_inline void *slab_alloc(struct kmem_cache *s,
    2799                 :            :                 gfp_t gfpflags, unsigned long addr)
    2800                 :            : {
    2801                 :            :         return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
    2802                 :            : }
    2803                 :            : 
    2804                 :          3 : void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
    2805                 :            : {
    2806                 :          3 :         void *ret = slab_alloc(s, gfpflags, _RET_IP_);
    2807                 :            : 
    2808                 :          3 :         trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
    2809                 :            :                                 s->size, gfpflags);
    2810                 :            : 
    2811                 :          3 :         return ret;
    2812                 :            : }
    2813                 :            : EXPORT_SYMBOL(kmem_cache_alloc);
    2814                 :            : 
    2815                 :            : #ifdef CONFIG_TRACING
    2816                 :          3 : void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
    2817                 :            : {
    2818                 :          3 :         void *ret = slab_alloc(s, gfpflags, _RET_IP_);
    2819                 :          3 :         trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
    2820                 :            :         ret = kasan_kmalloc(s, ret, size, gfpflags);
    2821                 :          3 :         return ret;
    2822                 :            : }
    2823                 :            : EXPORT_SYMBOL(kmem_cache_alloc_trace);
    2824                 :            : #endif
    2825                 :            : 
    2826                 :            : #ifdef CONFIG_NUMA
    2827                 :            : void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
    2828                 :            : {
    2829                 :            :         void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
    2830                 :            : 
    2831                 :            :         trace_kmem_cache_alloc_node(_RET_IP_, ret,
    2832                 :            :                                     s->object_size, s->size, gfpflags, node);
    2833                 :            : 
    2834                 :            :         return ret;
    2835                 :            : }
    2836                 :            : EXPORT_SYMBOL(kmem_cache_alloc_node);
    2837                 :            : 
    2838                 :            : #ifdef CONFIG_TRACING
    2839                 :            : void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
    2840                 :            :                                     gfp_t gfpflags,
    2841                 :            :                                     int node, size_t size)
    2842                 :            : {
    2843                 :            :         void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
    2844                 :            : 
    2845                 :            :         trace_kmalloc_node(_RET_IP_, ret,
    2846                 :            :                            size, s->size, gfpflags, node);
    2847                 :            : 
    2848                 :            :         ret = kasan_kmalloc(s, ret, size, gfpflags);
    2849                 :            :         return ret;
    2850                 :            : }
    2851                 :            : EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
    2852                 :            : #endif
    2853                 :            : #endif  /* CONFIG_NUMA */
    2854                 :            : 
    2855                 :            : /*
    2856                 :            :  * Slow path handling. This may still be called frequently since objects
    2857                 :            :  * have a longer lifetime than the cpu slabs in most processing loads.
    2858                 :            :  *
    2859                 :            :  * So we still attempt to reduce cache line usage. Just take the slab
    2860                 :            :  * lock and free the item. If there is no additional partial page
    2861                 :            :  * handling required then we can return immediately.
    2862                 :            :  */
    2863                 :          3 : static void __slab_free(struct kmem_cache *s, struct page *page,
    2864                 :            :                         void *head, void *tail, int cnt,
    2865                 :            :                         unsigned long addr)
    2866                 :            : 
    2867                 :            : {
    2868                 :            :         void *prior;
    2869                 :            :         int was_frozen;
    2870                 :            :         struct page new;
    2871                 :            :         unsigned long counters;
    2872                 :            :         struct kmem_cache_node *n = NULL;
    2873                 :            :         unsigned long uninitialized_var(flags);
    2874                 :            : 
    2875                 :            :         stat(s, FREE_SLOWPATH);
    2876                 :            : 
    2877                 :          3 :         if (kmem_cache_debug(s) &&
    2878                 :          0 :             !free_debug_processing(s, page, head, tail, cnt, addr))
    2879                 :          3 :                 return;
    2880                 :            : 
    2881                 :            :         do {
    2882                 :          3 :                 if (unlikely(n)) {
    2883                 :            :                         spin_unlock_irqrestore(&n->list_lock, flags);
    2884                 :            :                         n = NULL;
    2885                 :            :                 }
    2886                 :          3 :                 prior = page->freelist;
    2887                 :          3 :                 counters = page->counters;
    2888                 :            :                 set_freepointer(s, tail, prior);
    2889                 :          3 :                 new.counters = counters;
    2890                 :          3 :                 was_frozen = new.frozen;
    2891                 :          3 :                 new.inuse -= cnt;
    2892                 :          3 :                 if ((!new.inuse || !prior) && !was_frozen) {
    2893                 :            : 
    2894                 :          3 :                         if (kmem_cache_has_cpu_partial(s) && !prior) {
    2895                 :            : 
    2896                 :            :                                 /*
    2897                 :            :                                  * Slab was on no list before and will be
    2898                 :            :                                  * partially empty
    2899                 :            :                                  * We can defer the list move and instead
    2900                 :            :                                  * freeze it.
    2901                 :            :                                  */
    2902                 :          3 :                                 new.frozen = 1;
    2903                 :            : 
    2904                 :            :                         } else { /* Needs to be taken off a list */
    2905                 :            : 
    2906                 :            :                                 n = get_node(s, page_to_nid(page));
    2907                 :            :                                 /*
    2908                 :            :                                  * Speculatively acquire the list_lock.
    2909                 :            :                                  * If the cmpxchg does not succeed then we may
    2910                 :            :                                  * drop the list_lock without any processing.
    2911                 :            :                                  *
    2912                 :            :                                  * Otherwise the list_lock will synchronize with
    2913                 :            :                                  * other processors updating the list of slabs.
    2914                 :            :                                  */
    2915                 :          3 :                                 spin_lock_irqsave(&n->list_lock, flags);
    2916                 :            : 
    2917                 :            :                         }
    2918                 :            :                 }
    2919                 :            : 
    2920                 :          3 :         } while (!cmpxchg_double_slab(s, page,
    2921                 :            :                 prior, counters,
    2922                 :            :                 head, new.counters,
    2923                 :          3 :                 "__slab_free"));
    2924                 :            : 
    2925                 :          3 :         if (likely(!n)) {
    2926                 :            : 
    2927                 :            :                 /*
    2928                 :            :                  * If we just froze the page then put it onto the
    2929                 :            :                  * per cpu partial list.
    2930                 :            :                  */
    2931                 :          3 :                 if (new.frozen && !was_frozen) {
    2932                 :          3 :                         put_cpu_partial(s, page, 1);
    2933                 :            :                         stat(s, CPU_PARTIAL_FREE);
    2934                 :            :                 }
    2935                 :            :                 /*
    2936                 :            :                  * The list lock was not taken therefore no list
    2937                 :            :                  * activity can be necessary.
    2938                 :            :                  */
    2939                 :            :                 if (was_frozen)
    2940                 :            :                         stat(s, FREE_FROZEN);
    2941                 :            :                 return;
    2942                 :            :         }
    2943                 :            : 
    2944                 :          3 :         if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
    2945                 :            :                 goto slab_empty;
    2946                 :            : 
    2947                 :            :         /*
    2948                 :            :          * Objects left in the slab. If it was not on the partial list before
    2949                 :            :          * then add it.
    2950                 :            :          */
    2951                 :          3 :         if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
    2952                 :            :                 remove_full(s, n, page);
    2953                 :            :                 add_partial(n, page, DEACTIVATE_TO_TAIL);
    2954                 :            :                 stat(s, FREE_ADD_PARTIAL);
    2955                 :            :         }
    2956                 :            :         spin_unlock_irqrestore(&n->list_lock, flags);
    2957                 :            :         return;
    2958                 :            : 
    2959                 :            : slab_empty:
    2960                 :          3 :         if (prior) {
    2961                 :            :                 /*
    2962                 :            :                  * Slab on the partial list.
    2963                 :            :                  */
    2964                 :            :                 remove_partial(n, page);
    2965                 :            :                 stat(s, FREE_REMOVE_PARTIAL);
    2966                 :            :         } else {
    2967                 :            :                 /* Slab must be on the full list */
    2968                 :            :                 remove_full(s, n, page);
    2969                 :            :         }
    2970                 :            : 
    2971                 :            :         spin_unlock_irqrestore(&n->list_lock, flags);
    2972                 :            :         stat(s, FREE_SLAB);
    2973                 :          3 :         discard_slab(s, page);
    2974                 :            : }
    2975                 :            : 
    2976                 :            : /*
    2977                 :            :  * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
    2978                 :            :  * can perform fastpath freeing without additional function calls.
    2979                 :            :  *
    2980                 :            :  * The fastpath is only possible if we are freeing to the current cpu slab
    2981                 :            :  * of this processor. This typically the case if we have just allocated
    2982                 :            :  * the item before.
    2983                 :            :  *
    2984                 :            :  * If fastpath is not possible then fall back to __slab_free where we deal
    2985                 :            :  * with all sorts of special processing.
    2986                 :            :  *
    2987                 :            :  * Bulk free of a freelist with several objects (all pointing to the
    2988                 :            :  * same page) possible by specifying head and tail ptr, plus objects
    2989                 :            :  * count (cnt). Bulk free indicated by tail pointer being set.
    2990                 :            :  */
    2991                 :            : static __always_inline void do_slab_free(struct kmem_cache *s,
    2992                 :            :                                 struct page *page, void *head, void *tail,
    2993                 :            :                                 int cnt, unsigned long addr)
    2994                 :            : {
    2995                 :          3 :         void *tail_obj = tail ? : head;
    2996                 :            :         struct kmem_cache_cpu *c;
    2997                 :            :         unsigned long tid;
    2998                 :            : redo:
    2999                 :            :         /*
    3000                 :            :          * Determine the currently cpus per cpu slab.
    3001                 :            :          * The cpu may change afterward. However that does not matter since
    3002                 :            :          * data is retrieved via this pointer. If we are on the same cpu
    3003                 :            :          * during the cmpxchg then the free will succeed.
    3004                 :            :          */
    3005                 :            :         do {
    3006                 :          3 :                 tid = this_cpu_read(s->cpu_slab->tid);
    3007                 :          3 :                 c = raw_cpu_ptr(s->cpu_slab);
    3008                 :            :         } while (IS_ENABLED(CONFIG_PREEMPT) &&
    3009                 :            :                  unlikely(tid != READ_ONCE(c->tid)));
    3010                 :            : 
    3011                 :            :         /* Same with comment on barrier() in slab_alloc_node() */
    3012                 :          3 :         barrier();
    3013                 :            : 
    3014                 :          3 :         if (likely(page == c->page)) {
    3015                 :            :                 void **freelist = READ_ONCE(c->freelist);
    3016                 :            : 
    3017                 :          3 :                 set_freepointer(s, tail_obj, freelist);
    3018                 :            : 
    3019                 :          3 :                 if (unlikely(!this_cpu_cmpxchg_double(
    3020                 :            :                                 s->cpu_slab->freelist, s->cpu_slab->tid,
    3021                 :            :                                 freelist, tid,
    3022                 :            :                                 head, next_tid(tid)))) {
    3023                 :            : 
    3024                 :            :                         note_cmpxchg_failure("slab_free", s, tid);
    3025                 :            :                         goto redo;
    3026                 :            :                 }
    3027                 :            :                 stat(s, FREE_FASTPATH);
    3028                 :            :         } else
    3029                 :          3 :                 __slab_free(s, page, head, tail_obj, cnt, addr);
    3030                 :            : 
    3031                 :            : }
    3032                 :            : 
    3033                 :            : static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
    3034                 :            :                                       void *head, void *tail, int cnt,
    3035                 :            :                                       unsigned long addr)
    3036                 :            : {
    3037                 :            :         /*
    3038                 :            :          * With KASAN enabled slab_free_freelist_hook modifies the freelist
    3039                 :            :          * to remove objects, whose reuse must be delayed.
    3040                 :            :          */
    3041                 :          3 :         if (slab_free_freelist_hook(s, &head, &tail))
    3042                 :          3 :                 do_slab_free(s, page, head, tail, cnt, addr);
    3043                 :            : }
    3044                 :            : 
    3045                 :            : #ifdef CONFIG_KASAN_GENERIC
    3046                 :            : void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
    3047                 :            : {
    3048                 :            :         do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
    3049                 :            : }
    3050                 :            : #endif
    3051                 :            : 
    3052                 :          3 : void kmem_cache_free(struct kmem_cache *s, void *x)
    3053                 :            : {
    3054                 :          3 :         s = cache_from_obj(s, x);
    3055                 :          3 :         if (!s)
    3056                 :          3 :                 return;
    3057                 :          3 :         slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
    3058                 :          3 :         trace_kmem_cache_free(_RET_IP_, x);
    3059                 :            : }
    3060                 :            : EXPORT_SYMBOL(kmem_cache_free);
    3061                 :            : 
    3062                 :            : struct detached_freelist {
    3063                 :            :         struct page *page;
    3064                 :            :         void *tail;
    3065                 :            :         void *freelist;
    3066                 :            :         int cnt;
    3067                 :            :         struct kmem_cache *s;
    3068                 :            : };
    3069                 :            : 
    3070                 :            : /*
    3071                 :            :  * This function progressively scans the array with free objects (with
    3072                 :            :  * a limited look ahead) and extract objects belonging to the same
    3073                 :            :  * page.  It builds a detached freelist directly within the given
    3074                 :            :  * page/objects.  This can happen without any need for
    3075                 :            :  * synchronization, because the objects are owned by running process.
    3076                 :            :  * The freelist is build up as a single linked list in the objects.
    3077                 :            :  * The idea is, that this detached freelist can then be bulk
    3078                 :            :  * transferred to the real freelist(s), but only requiring a single
    3079                 :            :  * synchronization primitive.  Look ahead in the array is limited due
    3080                 :            :  * to performance reasons.
    3081                 :            :  */
    3082                 :            : static inline
    3083                 :          0 : int build_detached_freelist(struct kmem_cache *s, size_t size,
    3084                 :            :                             void **p, struct detached_freelist *df)
    3085                 :            : {
    3086                 :            :         size_t first_skipped_index = 0;
    3087                 :            :         int lookahead = 3;
    3088                 :            :         void *object;
    3089                 :            :         struct page *page;
    3090                 :            : 
    3091                 :            :         /* Always re-init detached_freelist */
    3092                 :          0 :         df->page = NULL;
    3093                 :            : 
    3094                 :            :         do {
    3095                 :          0 :                 object = p[--size];
    3096                 :            :                 /* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */
    3097                 :          0 :         } while (!object && size);
    3098                 :            : 
    3099                 :          0 :         if (!object)
    3100                 :            :                 return 0;
    3101                 :            : 
    3102                 :            :         page = virt_to_head_page(object);
    3103                 :          0 :         if (!s) {
    3104                 :            :                 /* Handle kalloc'ed objects */
    3105                 :          0 :                 if (unlikely(!PageSlab(page))) {
    3106                 :          0 :                         BUG_ON(!PageCompound(page));
    3107                 :            :                         kfree_hook(object);
    3108                 :          0 :                         __free_pages(page, compound_order(page));
    3109                 :          0 :                         p[size] = NULL; /* mark object processed */
    3110                 :          0 :                         return size;
    3111                 :            :                 }
    3112                 :            :                 /* Derive kmem_cache from object */
    3113                 :          0 :                 df->s = page->slab_cache;
    3114                 :            :         } else {
    3115                 :          0 :                 df->s = cache_from_obj(s, object); /* Support for memcg */
    3116                 :            :         }
    3117                 :            : 
    3118                 :            :         /* Start new detached freelist */
    3119                 :          0 :         df->page = page;
    3120                 :          0 :         set_freepointer(df->s, object, NULL);
    3121                 :          0 :         df->tail = object;
    3122                 :          0 :         df->freelist = object;
    3123                 :          0 :         p[size] = NULL; /* mark object processed */
    3124                 :          0 :         df->cnt = 1;
    3125                 :            : 
    3126                 :          0 :         while (size) {
    3127                 :          0 :                 object = p[--size];
    3128                 :          0 :                 if (!object)
    3129                 :          0 :                         continue; /* Skip processed objects */
    3130                 :            : 
    3131                 :            :                 /* df->page is always set at this point */
    3132                 :          0 :                 if (df->page == virt_to_head_page(object)) {
    3133                 :            :                         /* Opportunity build freelist */
    3134                 :          0 :                         set_freepointer(df->s, object, df->freelist);
    3135                 :          0 :                         df->freelist = object;
    3136                 :          0 :                         df->cnt++;
    3137                 :          0 :                         p[size] = NULL; /* mark object processed */
    3138                 :            : 
    3139                 :          0 :                         continue;
    3140                 :            :                 }
    3141                 :            : 
    3142                 :            :                 /* Limit look ahead search */
    3143                 :          0 :                 if (!--lookahead)
    3144                 :            :                         break;
    3145                 :            : 
    3146                 :          0 :                 if (!first_skipped_index)
    3147                 :            :                         first_skipped_index = size + 1;
    3148                 :            :         }
    3149                 :            : 
    3150                 :          0 :         return first_skipped_index;
    3151                 :            : }
    3152                 :            : 
    3153                 :            : /* Note that interrupts must be enabled when calling this function. */
    3154                 :          0 : void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
    3155                 :            : {
    3156                 :          0 :         if (WARN_ON(!size))
    3157                 :          0 :                 return;
    3158                 :            : 
    3159                 :            :         do {
    3160                 :            :                 struct detached_freelist df;
    3161                 :            : 
    3162                 :          0 :                 size = build_detached_freelist(s, size, p, &df);
    3163                 :          0 :                 if (!df.page)
    3164                 :          0 :                         continue;
    3165                 :            : 
    3166                 :          0 :                 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
    3167                 :          0 :         } while (likely(size));
    3168                 :            : }
    3169                 :            : EXPORT_SYMBOL(kmem_cache_free_bulk);
    3170                 :            : 
    3171                 :            : /* Note that interrupts must be enabled when calling this function. */
    3172                 :          0 : int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
    3173                 :            :                           void **p)
    3174                 :            : {
    3175                 :            :         struct kmem_cache_cpu *c;
    3176                 :            :         int i;
    3177                 :            : 
    3178                 :            :         /* memcg and kmem_cache debug support */
    3179                 :          0 :         s = slab_pre_alloc_hook(s, flags);
    3180                 :          0 :         if (unlikely(!s))
    3181                 :            :                 return false;
    3182                 :            :         /*
    3183                 :            :          * Drain objects in the per cpu slab, while disabling local
    3184                 :            :          * IRQs, which protects against PREEMPT and interrupts
    3185                 :            :          * handlers invoking normal fastpath.
    3186                 :            :          */
    3187                 :          0 :         local_irq_disable();
    3188                 :          0 :         c = this_cpu_ptr(s->cpu_slab);
    3189                 :            : 
    3190                 :          0 :         for (i = 0; i < size; i++) {
    3191                 :          0 :                 void *object = c->freelist;
    3192                 :            : 
    3193                 :          0 :                 if (unlikely(!object)) {
    3194                 :            :                         /*
    3195                 :            :                          * We may have removed an object from c->freelist using
    3196                 :            :                          * the fastpath in the previous iteration; in that case,
    3197                 :            :                          * c->tid has not been bumped yet.
    3198                 :            :                          * Since ___slab_alloc() may reenable interrupts while
    3199                 :            :                          * allocating memory, we should bump c->tid now.
    3200                 :            :                          */
    3201                 :          0 :                         c->tid = next_tid(c->tid);
    3202                 :            : 
    3203                 :            :                         /*
    3204                 :            :                          * Invoking slow path likely have side-effect
    3205                 :            :                          * of re-populating per CPU c->freelist
    3206                 :            :                          */
    3207                 :          0 :                         p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
    3208                 :          0 :                                             _RET_IP_, c);
    3209                 :          0 :                         if (unlikely(!p[i]))
    3210                 :            :                                 goto error;
    3211                 :            : 
    3212                 :          0 :                         c = this_cpu_ptr(s->cpu_slab);
    3213                 :            :                         maybe_wipe_obj_freeptr(s, p[i]);
    3214                 :            : 
    3215                 :          0 :                         continue; /* goto for-loop */
    3216                 :            :                 }
    3217                 :          0 :                 c->freelist = get_freepointer(s, object);
    3218                 :          0 :                 p[i] = object;
    3219                 :            :                 maybe_wipe_obj_freeptr(s, p[i]);
    3220                 :            :         }
    3221                 :          0 :         c->tid = next_tid(c->tid);
    3222                 :          0 :         local_irq_enable();
    3223                 :            : 
    3224                 :            :         /* Clear memory outside IRQ disabled fastpath loop */
    3225                 :          0 :         if (unlikely(slab_want_init_on_alloc(flags, s))) {
    3226                 :            :                 int j;
    3227                 :            : 
    3228                 :          0 :                 for (j = 0; j < i; j++)
    3229                 :          0 :                         memset(p[j], 0, s->object_size);
    3230                 :            :         }
    3231                 :            : 
    3232                 :            :         /* memcg and kmem_cache debug support */
    3233                 :            :         slab_post_alloc_hook(s, flags, size, p);
    3234                 :          0 :         return i;
    3235                 :            : error:
    3236                 :          0 :         local_irq_enable();
    3237                 :            :         slab_post_alloc_hook(s, flags, i, p);
    3238                 :          0 :         __kmem_cache_free_bulk(s, i, p);
    3239                 :          0 :         return 0;
    3240                 :            : }
    3241                 :            : EXPORT_SYMBOL(kmem_cache_alloc_bulk);
    3242                 :            : 
    3243                 :            : 
    3244                 :            : /*
    3245                 :            :  * Object placement in a slab is made very easy because we always start at
    3246                 :            :  * offset 0. If we tune the size of the object to the alignment then we can
    3247                 :            :  * get the required alignment by putting one properly sized object after
    3248                 :            :  * another.
    3249                 :            :  *
    3250                 :            :  * Notice that the allocation order determines the sizes of the per cpu
    3251                 :            :  * caches. Each processor has always one slab available for allocations.
    3252                 :            :  * Increasing the allocation order reduces the number of times that slabs
    3253                 :            :  * must be moved on and off the partial lists and is therefore a factor in
    3254                 :            :  * locking overhead.
    3255                 :            :  */
    3256                 :            : 
    3257                 :            : /*
    3258                 :            :  * Mininum / Maximum order of slab pages. This influences locking overhead
    3259                 :            :  * and slab fragmentation. A higher order reduces the number of partial slabs
    3260                 :            :  * and increases the number of allocations possible without having to
    3261                 :            :  * take the list_lock.
    3262                 :            :  */
    3263                 :            : static unsigned int slub_min_order;
    3264                 :            : static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
    3265                 :            : static unsigned int slub_min_objects;
    3266                 :            : 
    3267                 :            : /*
    3268                 :            :  * Calculate the order of allocation given an slab object size.
    3269                 :            :  *
    3270                 :            :  * The order of allocation has significant impact on performance and other
    3271                 :            :  * system components. Generally order 0 allocations should be preferred since
    3272                 :            :  * order 0 does not cause fragmentation in the page allocator. Larger objects
    3273                 :            :  * be problematic to put into order 0 slabs because there may be too much
    3274                 :            :  * unused space left. We go to a higher order if more than 1/16th of the slab
    3275                 :            :  * would be wasted.
    3276                 :            :  *
    3277                 :            :  * In order to reach satisfactory performance we must ensure that a minimum
    3278                 :            :  * number of objects is in one slab. Otherwise we may generate too much
    3279                 :            :  * activity on the partial lists which requires taking the list_lock. This is
    3280                 :            :  * less a concern for large slabs though which are rarely used.
    3281                 :            :  *
    3282                 :            :  * slub_max_order specifies the order where we begin to stop considering the
    3283                 :            :  * number of objects in a slab as critical. If we reach slub_max_order then
    3284                 :            :  * we try to keep the page order as low as possible. So we accept more waste
    3285                 :            :  * of space in favor of a small page order.
    3286                 :            :  *
    3287                 :            :  * Higher order allocations also allow the placement of more objects in a
    3288                 :            :  * slab and thereby reduce object handling overhead. If the user has
    3289                 :            :  * requested a higher mininum order then we start with that one instead of
    3290                 :            :  * the smallest order which will fit the object.
    3291                 :            :  */
    3292                 :          3 : static inline unsigned int slab_order(unsigned int size,
    3293                 :            :                 unsigned int min_objects, unsigned int max_order,
    3294                 :            :                 unsigned int fract_leftover)
    3295                 :            : {
    3296                 :          3 :         unsigned int min_order = slub_min_order;
    3297                 :            :         unsigned int order;
    3298                 :            : 
    3299                 :          3 :         if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
    3300                 :          0 :                 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
    3301                 :            : 
    3302                 :          3 :         for (order = max(min_order, (unsigned int)get_order(min_objects * size));
    3303                 :          0 :                         order <= max_order; order++) {
    3304                 :            : 
    3305                 :          3 :                 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
    3306                 :            :                 unsigned int rem;
    3307                 :            : 
    3308                 :          3 :                 rem = slab_size % size;
    3309                 :            : 
    3310                 :          3 :                 if (rem <= slab_size / fract_leftover)
    3311                 :            :                         break;
    3312                 :            :         }
    3313                 :            : 
    3314                 :          3 :         return order;
    3315                 :            : }
    3316                 :            : 
    3317                 :          3 : static inline int calculate_order(unsigned int size)
    3318                 :            : {
    3319                 :            :         unsigned int order;
    3320                 :            :         unsigned int min_objects;
    3321                 :            :         unsigned int max_objects;
    3322                 :            : 
    3323                 :            :         /*
    3324                 :            :          * Attempt to find best configuration for a slab. This
    3325                 :            :          * works by first attempting to generate a layout with
    3326                 :            :          * the best configuration and backing off gradually.
    3327                 :            :          *
    3328                 :            :          * First we increase the acceptable waste in a slab. Then
    3329                 :            :          * we reduce the minimum objects required in a slab.
    3330                 :            :          */
    3331                 :          3 :         min_objects = slub_min_objects;
    3332                 :          3 :         if (!min_objects)
    3333                 :          3 :                 min_objects = 4 * (fls(nr_cpu_ids) + 1);
    3334                 :          3 :         max_objects = order_objects(slub_max_order, size);
    3335                 :          3 :         min_objects = min(min_objects, max_objects);
    3336                 :            : 
    3337                 :          3 :         while (min_objects > 1) {
    3338                 :            :                 unsigned int fraction;
    3339                 :            : 
    3340                 :            :                 fraction = 16;
    3341                 :          3 :                 while (fraction >= 4) {
    3342                 :          3 :                         order = slab_order(size, min_objects,
    3343                 :            :                                         slub_max_order, fraction);
    3344                 :          3 :                         if (order <= slub_max_order)
    3345                 :          3 :                                 return order;
    3346                 :          0 :                         fraction /= 2;
    3347                 :            :                 }
    3348                 :          0 :                 min_objects--;
    3349                 :            :         }
    3350                 :            : 
    3351                 :            :         /*
    3352                 :            :          * We were unable to place multiple objects in a slab. Now
    3353                 :            :          * lets see if we can place a single object there.
    3354                 :            :          */
    3355                 :          0 :         order = slab_order(size, 1, slub_max_order, 1);
    3356                 :          0 :         if (order <= slub_max_order)
    3357                 :          0 :                 return order;
    3358                 :            : 
    3359                 :            :         /*
    3360                 :            :          * Doh this slab cannot be placed using slub_max_order.
    3361                 :            :          */
    3362                 :          0 :         order = slab_order(size, 1, MAX_ORDER, 1);
    3363                 :          0 :         if (order < MAX_ORDER)
    3364                 :          0 :                 return order;
    3365                 :            :         return -ENOSYS;
    3366                 :            : }
    3367                 :            : 
    3368                 :            : static void
    3369                 :            : init_kmem_cache_node(struct kmem_cache_node *n)
    3370                 :            : {
    3371                 :          3 :         n->nr_partial = 0;
    3372                 :          3 :         spin_lock_init(&n->list_lock);
    3373                 :          3 :         INIT_LIST_HEAD(&n->partial);
    3374                 :            : #ifdef CONFIG_SLUB_DEBUG
    3375                 :            :         atomic_long_set(&n->nr_slabs, 0);
    3376                 :            :         atomic_long_set(&n->total_objects, 0);
    3377                 :          3 :         INIT_LIST_HEAD(&n->full);
    3378                 :            : #endif
    3379                 :            : }
    3380                 :            : 
    3381                 :          3 : static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
    3382                 :            : {
    3383                 :            :         BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
    3384                 :            :                         KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
    3385                 :            : 
    3386                 :            :         /*
    3387                 :            :          * Must align to double word boundary for the double cmpxchg
    3388                 :            :          * instructions to work; see __pcpu_double_call_return_bool().
    3389                 :            :          */
    3390                 :          3 :         s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
    3391                 :            :                                      2 * sizeof(void *));
    3392                 :            : 
    3393                 :          3 :         if (!s->cpu_slab)
    3394                 :            :                 return 0;
    3395                 :            : 
    3396                 :          3 :         init_kmem_cache_cpus(s);
    3397                 :            : 
    3398                 :          3 :         return 1;
    3399                 :            : }
    3400                 :            : 
    3401                 :            : static struct kmem_cache *kmem_cache_node;
    3402                 :            : 
    3403                 :            : /*
    3404                 :            :  * No kmalloc_node yet so do it by hand. We know that this is the first
    3405                 :            :  * slab on the node for this slabcache. There are no concurrent accesses
    3406                 :            :  * possible.
    3407                 :            :  *
    3408                 :            :  * Note that this function only works on the kmem_cache_node
    3409                 :            :  * when allocating for the kmem_cache_node. This is used for bootstrapping
    3410                 :            :  * memory on a fresh node that has no slab structures yet.
    3411                 :            :  */
    3412                 :          3 : static void early_kmem_cache_node_alloc(int node)
    3413                 :            : {
    3414                 :            :         struct page *page;
    3415                 :            :         struct kmem_cache_node *n;
    3416                 :            : 
    3417                 :          3 :         BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
    3418                 :            : 
    3419                 :          3 :         page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
    3420                 :            : 
    3421                 :          3 :         BUG_ON(!page);
    3422                 :          3 :         if (page_to_nid(page) != node) {
    3423                 :          0 :                 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
    3424                 :          0 :                 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
    3425                 :            :         }
    3426                 :            : 
    3427                 :          3 :         n = page->freelist;
    3428                 :          3 :         BUG_ON(!n);
    3429                 :            : #ifdef CONFIG_SLUB_DEBUG
    3430                 :          3 :         init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
    3431                 :          3 :         init_tracking(kmem_cache_node, n);
    3432                 :            : #endif
    3433                 :            :         n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
    3434                 :            :                       GFP_KERNEL);
    3435                 :          3 :         page->freelist = get_freepointer(kmem_cache_node, n);
    3436                 :          3 :         page->inuse = 1;
    3437                 :          3 :         page->frozen = 0;
    3438                 :          3 :         kmem_cache_node->node[node] = n;
    3439                 :            :         init_kmem_cache_node(n);
    3440                 :          3 :         inc_slabs_node(kmem_cache_node, node, page->objects);
    3441                 :            : 
    3442                 :            :         /*
    3443                 :            :          * No locks need to be taken here as it has just been
    3444                 :            :          * initialized and there is no concurrent access.
    3445                 :            :          */
    3446                 :            :         __add_partial(n, page, DEACTIVATE_TO_HEAD);
    3447                 :          3 : }
    3448                 :            : 
    3449                 :          0 : static void free_kmem_cache_nodes(struct kmem_cache *s)
    3450                 :            : {
    3451                 :            :         int node;
    3452                 :            :         struct kmem_cache_node *n;
    3453                 :            : 
    3454                 :          0 :         for_each_kmem_cache_node(s, node, n) {
    3455                 :          0 :                 s->node[node] = NULL;
    3456                 :          0 :                 kmem_cache_free(kmem_cache_node, n);
    3457                 :            :         }
    3458                 :          0 : }
    3459                 :            : 
    3460                 :          0 : void __kmem_cache_release(struct kmem_cache *s)
    3461                 :            : {
    3462                 :            :         cache_random_seq_destroy(s);
    3463                 :          0 :         free_percpu(s->cpu_slab);
    3464                 :          0 :         free_kmem_cache_nodes(s);
    3465                 :          0 : }
    3466                 :            : 
    3467                 :          3 : static int init_kmem_cache_nodes(struct kmem_cache *s)
    3468                 :            : {
    3469                 :            :         int node;
    3470                 :            : 
    3471                 :          3 :         for_each_node_state(node, N_NORMAL_MEMORY) {
    3472                 :            :                 struct kmem_cache_node *n;
    3473                 :            : 
    3474                 :          3 :                 if (slab_state == DOWN) {
    3475                 :          3 :                         early_kmem_cache_node_alloc(node);
    3476                 :          3 :                         continue;
    3477                 :            :                 }
    3478                 :          3 :                 n = kmem_cache_alloc_node(kmem_cache_node,
    3479                 :            :                                                 GFP_KERNEL, node);
    3480                 :            : 
    3481                 :          3 :                 if (!n) {
    3482                 :          0 :                         free_kmem_cache_nodes(s);
    3483                 :          0 :                         return 0;
    3484                 :            :                 }
    3485                 :            : 
    3486                 :            :                 init_kmem_cache_node(n);
    3487                 :          3 :                 s->node[node] = n;
    3488                 :            :         }
    3489                 :            :         return 1;
    3490                 :            : }
    3491                 :            : 
    3492                 :            : static void set_min_partial(struct kmem_cache *s, unsigned long min)
    3493                 :            : {
    3494                 :          3 :         if (min < MIN_PARTIAL)
    3495                 :            :                 min = MIN_PARTIAL;
    3496                 :          3 :         else if (min > MAX_PARTIAL)
    3497                 :            :                 min = MAX_PARTIAL;
    3498                 :          3 :         s->min_partial = min;
    3499                 :            : }
    3500                 :            : 
    3501                 :            : static void set_cpu_partial(struct kmem_cache *s)
    3502                 :            : {
    3503                 :            : #ifdef CONFIG_SLUB_CPU_PARTIAL
    3504                 :            :         /*
    3505                 :            :          * cpu_partial determined the maximum number of objects kept in the
    3506                 :            :          * per cpu partial lists of a processor.
    3507                 :            :          *
    3508                 :            :          * Per cpu partial lists mainly contain slabs that just have one
    3509                 :            :          * object freed. If they are used for allocation then they can be
    3510                 :            :          * filled up again with minimal effort. The slab will never hit the
    3511                 :            :          * per node partial lists and therefore no locking will be required.
    3512                 :            :          *
    3513                 :            :          * This setting also determines
    3514                 :            :          *
    3515                 :            :          * A) The number of objects from per cpu partial slabs dumped to the
    3516                 :            :          *    per node list when we reach the limit.
    3517                 :            :          * B) The number of objects in cpu partial slabs to extract from the
    3518                 :            :          *    per node list when we run out of per cpu objects. We only fetch
    3519                 :            :          *    50% to keep some capacity around for frees.
    3520                 :            :          */
    3521                 :          3 :         if (!kmem_cache_has_cpu_partial(s))
    3522                 :          0 :                 s->cpu_partial = 0;
    3523                 :          3 :         else if (s->size >= PAGE_SIZE)
    3524                 :          3 :                 s->cpu_partial = 2;
    3525                 :          3 :         else if (s->size >= 1024)
    3526                 :          3 :                 s->cpu_partial = 6;
    3527                 :          3 :         else if (s->size >= 256)
    3528                 :          3 :                 s->cpu_partial = 13;
    3529                 :            :         else
    3530                 :          3 :                 s->cpu_partial = 30;
    3531                 :            : #endif
    3532                 :            : }
    3533                 :            : 
    3534                 :            : /*
    3535                 :            :  * calculate_sizes() determines the order and the distribution of data within
    3536                 :            :  * a slab object.
    3537                 :            :  */
    3538                 :          3 : static int calculate_sizes(struct kmem_cache *s, int forced_order)
    3539                 :            : {
    3540                 :          3 :         slab_flags_t flags = s->flags;
    3541                 :          3 :         unsigned int size = s->object_size;
    3542                 :            :         unsigned int order;
    3543                 :            : 
    3544                 :            :         /*
    3545                 :            :          * Round up object size to the next word boundary. We can only
    3546                 :            :          * place the free pointer at word boundaries and this determines
    3547                 :            :          * the possible location of the free pointer.
    3548                 :            :          */
    3549                 :          3 :         size = ALIGN(size, sizeof(void *));
    3550                 :            : 
    3551                 :            : #ifdef CONFIG_SLUB_DEBUG
    3552                 :            :         /*
    3553                 :            :          * Determine if we can poison the object itself. If the user of
    3554                 :            :          * the slab may touch the object after free or before allocation
    3555                 :            :          * then we should never poison the object itself.
    3556                 :            :          */
    3557                 :          3 :         if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
    3558                 :          0 :                         !s->ctor)
    3559                 :          0 :                 s->flags |= __OBJECT_POISON;
    3560                 :            :         else
    3561                 :          3 :                 s->flags &= ~__OBJECT_POISON;
    3562                 :            : 
    3563                 :            : 
    3564                 :            :         /*
    3565                 :            :          * If we are Redzoning then check if there is some space between the
    3566                 :            :          * end of the object and the free pointer. If not then add an
    3567                 :            :          * additional word to have some bytes to store Redzone information.
    3568                 :            :          */
    3569                 :          3 :         if ((flags & SLAB_RED_ZONE) && size == s->object_size)
    3570                 :          0 :                 size += sizeof(void *);
    3571                 :            : #endif
    3572                 :            : 
    3573                 :            :         /*
    3574                 :            :          * With that we have determined the number of bytes in actual use
    3575                 :            :          * by the object. This is the potential offset to the free pointer.
    3576                 :            :          */
    3577                 :          3 :         s->inuse = size;
    3578                 :            : 
    3579                 :          3 :         if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
    3580                 :          3 :                 s->ctor)) {
    3581                 :            :                 /*
    3582                 :            :                  * Relocate free pointer after the object if it is not
    3583                 :            :                  * permitted to overwrite the first word of the object on
    3584                 :            :                  * kmem_cache_free.
    3585                 :            :                  *
    3586                 :            :                  * This is the case if we do RCU, have a constructor or
    3587                 :            :                  * destructor or are poisoning the objects.
    3588                 :            :                  */
    3589                 :          3 :                 s->offset = size;
    3590                 :          3 :                 size += sizeof(void *);
    3591                 :            :         }
    3592                 :            : 
    3593                 :            : #ifdef CONFIG_SLUB_DEBUG
    3594                 :          3 :         if (flags & SLAB_STORE_USER)
    3595                 :            :                 /*
    3596                 :            :                  * Need to store information about allocs and frees after
    3597                 :            :                  * the object.
    3598                 :            :                  */
    3599                 :          0 :                 size += 2 * sizeof(struct track);
    3600                 :            : #endif
    3601                 :            : 
    3602                 :            :         kasan_cache_create(s, &size, &s->flags);
    3603                 :            : #ifdef CONFIG_SLUB_DEBUG
    3604                 :          3 :         if (flags & SLAB_RED_ZONE) {
    3605                 :            :                 /*
    3606                 :            :                  * Add some empty padding so that we can catch
    3607                 :            :                  * overwrites from earlier objects rather than let
    3608                 :            :                  * tracking information or the free pointer be
    3609                 :            :                  * corrupted if a user writes before the start
    3610                 :            :                  * of the object.
    3611                 :            :                  */
    3612                 :          0 :                 size += sizeof(void *);
    3613                 :            : 
    3614                 :          0 :                 s->red_left_pad = sizeof(void *);
    3615                 :          0 :                 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
    3616                 :          0 :                 size += s->red_left_pad;
    3617                 :            :         }
    3618                 :            : #endif
    3619                 :            : 
    3620                 :            :         /*
    3621                 :            :          * SLUB stores one object immediately after another beginning from
    3622                 :            :          * offset 0. In order to align the objects we have to simply size
    3623                 :            :          * each object to conform to the alignment.
    3624                 :            :          */
    3625                 :          3 :         size = ALIGN(size, s->align);
    3626                 :          3 :         s->size = size;
    3627                 :          3 :         if (forced_order >= 0)
    3628                 :          0 :                 order = forced_order;
    3629                 :            :         else
    3630                 :          3 :                 order = calculate_order(size);
    3631                 :            : 
    3632                 :          3 :         if ((int)order < 0)
    3633                 :            :                 return 0;
    3634                 :            : 
    3635                 :          3 :         s->allocflags = 0;
    3636                 :          3 :         if (order)
    3637                 :          3 :                 s->allocflags |= __GFP_COMP;
    3638                 :            : 
    3639                 :          3 :         if (s->flags & SLAB_CACHE_DMA)
    3640                 :          0 :                 s->allocflags |= GFP_DMA;
    3641                 :            : 
    3642                 :          3 :         if (s->flags & SLAB_CACHE_DMA32)
    3643                 :          0 :                 s->allocflags |= GFP_DMA32;
    3644                 :            : 
    3645                 :          3 :         if (s->flags & SLAB_RECLAIM_ACCOUNT)
    3646                 :          3 :                 s->allocflags |= __GFP_RECLAIMABLE;
    3647                 :            : 
    3648                 :            :         /*
    3649                 :            :          * Determine the number of objects per slab
    3650                 :            :          */
    3651                 :          3 :         s->oo = oo_make(order, size);
    3652                 :          3 :         s->min = oo_make(get_order(size), size);
    3653                 :          3 :         if (oo_objects(s->oo) > oo_objects(s->max))
    3654                 :          3 :                 s->max = s->oo;
    3655                 :            : 
    3656                 :          3 :         return !!oo_objects(s->oo);
    3657                 :            : }
    3658                 :            : 
    3659                 :          3 : static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
    3660                 :            : {
    3661                 :          3 :         s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
    3662                 :            : #ifdef CONFIG_SLAB_FREELIST_HARDENED
    3663                 :            :         s->random = get_random_long();
    3664                 :            : #endif
    3665                 :            : 
    3666                 :          3 :         if (!calculate_sizes(s, -1))
    3667                 :            :                 goto error;
    3668                 :          3 :         if (disable_higher_order_debug) {
    3669                 :            :                 /*
    3670                 :            :                  * Disable debugging flags that store metadata if the min slab
    3671                 :            :                  * order increased.
    3672                 :            :                  */
    3673                 :          0 :                 if (get_order(s->size) > get_order(s->object_size)) {
    3674                 :          0 :                         s->flags &= ~DEBUG_METADATA_FLAGS;
    3675                 :          0 :                         s->offset = 0;
    3676                 :          0 :                         if (!calculate_sizes(s, -1))
    3677                 :            :                                 goto error;
    3678                 :            :                 }
    3679                 :            :         }
    3680                 :            : 
    3681                 :            : #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
    3682                 :            :     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
    3683                 :            :         if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
    3684                 :            :                 /* Enable fast mode */
    3685                 :            :                 s->flags |= __CMPXCHG_DOUBLE;
    3686                 :            : #endif
    3687                 :            : 
    3688                 :            :         /*
    3689                 :            :          * The larger the object size is, the more pages we want on the partial
    3690                 :            :          * list to avoid pounding the page allocator excessively.
    3691                 :            :          */
    3692                 :          3 :         set_min_partial(s, ilog2(s->size) / 2);
    3693                 :            : 
    3694                 :            :         set_cpu_partial(s);
    3695                 :            : 
    3696                 :            : #ifdef CONFIG_NUMA
    3697                 :            :         s->remote_node_defrag_ratio = 1000;
    3698                 :            : #endif
    3699                 :            : 
    3700                 :            :         /* Initialize the pre-computed randomized freelist if slab is up */
    3701                 :            :         if (slab_state >= UP) {
    3702                 :            :                 if (init_cache_random_seq(s))
    3703                 :            :                         goto error;
    3704                 :            :         }
    3705                 :            : 
    3706                 :          3 :         if (!init_kmem_cache_nodes(s))
    3707                 :            :                 goto error;
    3708                 :            : 
    3709                 :          3 :         if (alloc_kmem_cache_cpus(s))
    3710                 :            :                 return 0;
    3711                 :            : 
    3712                 :          0 :         free_kmem_cache_nodes(s);
    3713                 :            : error:
    3714                 :            :         return -EINVAL;
    3715                 :            : }
    3716                 :            : 
    3717                 :          0 : static void list_slab_objects(struct kmem_cache *s, struct page *page,
    3718                 :            :                                                         const char *text)
    3719                 :            : {
    3720                 :            : #ifdef CONFIG_SLUB_DEBUG
    3721                 :            :         void *addr = page_address(page);
    3722                 :            :         void *p;
    3723                 :          0 :         unsigned long *map = bitmap_zalloc(page->objects, GFP_ATOMIC);
    3724                 :          0 :         if (!map)
    3725                 :          0 :                 return;
    3726                 :          0 :         slab_err(s, page, text, s->name);
    3727                 :            :         slab_lock(page);
    3728                 :            : 
    3729                 :          0 :         get_map(s, page, map);
    3730                 :          0 :         for_each_object(p, s, addr, page->objects) {
    3731                 :            : 
    3732                 :          0 :                 if (!test_bit(slab_index(p, s, addr), map)) {
    3733                 :          0 :                         pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
    3734                 :          0 :                         print_tracking(s, p);
    3735                 :            :                 }
    3736                 :            :         }
    3737                 :            :         slab_unlock(page);
    3738                 :          0 :         bitmap_free(map);
    3739                 :            : #endif
    3740                 :            : }
    3741                 :            : 
    3742                 :            : /*
    3743                 :            :  * Attempt to free all partial slabs on a node.
    3744                 :            :  * This is called from __kmem_cache_shutdown(). We must take list_lock
    3745                 :            :  * because sysfs file might still access partial list after the shutdowning.
    3746                 :            :  */
    3747                 :          0 : static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
    3748                 :            : {
    3749                 :          0 :         LIST_HEAD(discard);
    3750                 :            :         struct page *page, *h;
    3751                 :            : 
    3752                 :          0 :         BUG_ON(irqs_disabled());
    3753                 :            :         spin_lock_irq(&n->list_lock);
    3754                 :          0 :         list_for_each_entry_safe(page, h, &n->partial, slab_list) {
    3755                 :          0 :                 if (!page->inuse) {
    3756                 :            :                         remove_partial(n, page);
    3757                 :            :                         list_add(&page->slab_list, &discard);
    3758                 :            :                 } else {
    3759                 :          0 :                         list_slab_objects(s, page,
    3760                 :            :                         "Objects remaining in %s on __kmem_cache_shutdown()");
    3761                 :            :                 }
    3762                 :            :         }
    3763                 :            :         spin_unlock_irq(&n->list_lock);
    3764                 :            : 
    3765                 :          0 :         list_for_each_entry_safe(page, h, &discard, slab_list)
    3766                 :          0 :                 discard_slab(s, page);
    3767                 :          0 : }
    3768                 :            : 
    3769                 :          0 : bool __kmem_cache_empty(struct kmem_cache *s)
    3770                 :            : {
    3771                 :            :         int node;
    3772                 :            :         struct kmem_cache_node *n;
    3773                 :            : 
    3774                 :          0 :         for_each_kmem_cache_node(s, node, n)
    3775                 :          0 :                 if (n->nr_partial || slabs_node(s, node))
    3776                 :            :                         return false;
    3777                 :            :         return true;
    3778                 :            : }
    3779                 :            : 
    3780                 :            : /*
    3781                 :            :  * Release all resources used by a slab cache.
    3782                 :            :  */
    3783                 :          0 : int __kmem_cache_shutdown(struct kmem_cache *s)
    3784                 :            : {
    3785                 :            :         int node;
    3786                 :            :         struct kmem_cache_node *n;
    3787                 :            : 
    3788                 :            :         flush_all(s);
    3789                 :            :         /* Attempt to free all objects */
    3790                 :          0 :         for_each_kmem_cache_node(s, node, n) {
    3791                 :          0 :                 free_partial(s, n);
    3792                 :          0 :                 if (n->nr_partial || slabs_node(s, node))
    3793                 :            :                         return 1;
    3794                 :            :         }
    3795                 :          0 :         sysfs_slab_remove(s);
    3796                 :          0 :         return 0;
    3797                 :            : }
    3798                 :            : 
    3799                 :            : /********************************************************************
    3800                 :            :  *              Kmalloc subsystem
    3801                 :            :  *******************************************************************/
    3802                 :            : 
    3803                 :          0 : static int __init setup_slub_min_order(char *str)
    3804                 :            : {
    3805                 :          0 :         get_option(&str, (int *)&slub_min_order);
    3806                 :            : 
    3807                 :          0 :         return 1;
    3808                 :            : }
    3809                 :            : 
    3810                 :            : __setup("slub_min_order=", setup_slub_min_order);
    3811                 :            : 
    3812                 :          0 : static int __init setup_slub_max_order(char *str)
    3813                 :            : {
    3814                 :          0 :         get_option(&str, (int *)&slub_max_order);
    3815                 :          0 :         slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
    3816                 :            : 
    3817                 :          0 :         return 1;
    3818                 :            : }
    3819                 :            : 
    3820                 :            : __setup("slub_max_order=", setup_slub_max_order);
    3821                 :            : 
    3822                 :          0 : static int __init setup_slub_min_objects(char *str)
    3823                 :            : {
    3824                 :          0 :         get_option(&str, (int *)&slub_min_objects);
    3825                 :            : 
    3826                 :          0 :         return 1;
    3827                 :            : }
    3828                 :            : 
    3829                 :            : __setup("slub_min_objects=", setup_slub_min_objects);
    3830                 :            : 
    3831                 :          3 : void *__kmalloc(size_t size, gfp_t flags)
    3832                 :            : {
    3833                 :            :         struct kmem_cache *s;
    3834                 :            :         void *ret;
    3835                 :            : 
    3836                 :          3 :         if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
    3837                 :          3 :                 return kmalloc_large(size, flags);
    3838                 :            : 
    3839                 :          3 :         s = kmalloc_slab(size, flags);
    3840                 :            : 
    3841                 :          3 :         if (unlikely(ZERO_OR_NULL_PTR(s)))
    3842                 :            :                 return s;
    3843                 :            : 
    3844                 :          3 :         ret = slab_alloc(s, flags, _RET_IP_);
    3845                 :            : 
    3846                 :          3 :         trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
    3847                 :            : 
    3848                 :            :         ret = kasan_kmalloc(s, ret, size, flags);
    3849                 :            : 
    3850                 :          3 :         return ret;
    3851                 :            : }
    3852                 :            : EXPORT_SYMBOL(__kmalloc);
    3853                 :            : 
    3854                 :            : #ifdef CONFIG_NUMA
    3855                 :            : static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
    3856                 :            : {
    3857                 :            :         struct page *page;
    3858                 :            :         void *ptr = NULL;
    3859                 :            :         unsigned int order = get_order(size);
    3860                 :            : 
    3861                 :            :         flags |= __GFP_COMP;
    3862                 :            :         page = alloc_pages_node(node, flags, order);
    3863                 :            :         if (page) {
    3864                 :            :                 ptr = page_address(page);
    3865                 :            :                 mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
    3866                 :            :                                     1 << order);
    3867                 :            :         }
    3868                 :            : 
    3869                 :            :         return kmalloc_large_node_hook(ptr, size, flags);
    3870                 :            : }
    3871                 :            : 
    3872                 :            : void *__kmalloc_node(size_t size, gfp_t flags, int node)
    3873                 :            : {
    3874                 :            :         struct kmem_cache *s;
    3875                 :            :         void *ret;
    3876                 :            : 
    3877                 :            :         if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
    3878                 :            :                 ret = kmalloc_large_node(size, flags, node);
    3879                 :            : 
    3880                 :            :                 trace_kmalloc_node(_RET_IP_, ret,
    3881                 :            :                                    size, PAGE_SIZE << get_order(size),
    3882                 :            :                                    flags, node);
    3883                 :            : 
    3884                 :            :                 return ret;
    3885                 :            :         }
    3886                 :            : 
    3887                 :            :         s = kmalloc_slab(size, flags);
    3888                 :            : 
    3889                 :            :         if (unlikely(ZERO_OR_NULL_PTR(s)))
    3890                 :            :                 return s;
    3891                 :            : 
    3892                 :            :         ret = slab_alloc_node(s, flags, node, _RET_IP_);
    3893                 :            : 
    3894                 :            :         trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
    3895                 :            : 
    3896                 :            :         ret = kasan_kmalloc(s, ret, size, flags);
    3897                 :            : 
    3898                 :            :         return ret;
    3899                 :            : }
    3900                 :            : EXPORT_SYMBOL(__kmalloc_node);
    3901                 :            : #endif  /* CONFIG_NUMA */
    3902                 :            : 
    3903                 :            : #ifdef CONFIG_HARDENED_USERCOPY
    3904                 :            : /*
    3905                 :            :  * Rejects incorrectly sized objects and objects that are to be copied
    3906                 :            :  * to/from userspace but do not fall entirely within the containing slab
    3907                 :            :  * cache's usercopy region.
    3908                 :            :  *
    3909                 :            :  * Returns NULL if check passes, otherwise const char * to name of cache
    3910                 :            :  * to indicate an error.
    3911                 :            :  */
    3912                 :            : void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
    3913                 :            :                          bool to_user)
    3914                 :            : {
    3915                 :            :         struct kmem_cache *s;
    3916                 :            :         unsigned int offset;
    3917                 :            :         size_t object_size;
    3918                 :            : 
    3919                 :            :         ptr = kasan_reset_tag(ptr);
    3920                 :            : 
    3921                 :            :         /* Find object and usable object size. */
    3922                 :            :         s = page->slab_cache;
    3923                 :            : 
    3924                 :            :         /* Reject impossible pointers. */
    3925                 :            :         if (ptr < page_address(page))
    3926                 :            :                 usercopy_abort("SLUB object not in SLUB page?!", NULL,
    3927                 :            :                                to_user, 0, n);
    3928                 :            : 
    3929                 :            :         /* Find offset within object. */
    3930                 :            :         offset = (ptr - page_address(page)) % s->size;
    3931                 :            : 
    3932                 :            :         /* Adjust for redzone and reject if within the redzone. */
    3933                 :            :         if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
    3934                 :            :                 if (offset < s->red_left_pad)
    3935                 :            :                         usercopy_abort("SLUB object in left red zone",
    3936                 :            :                                        s->name, to_user, offset, n);
    3937                 :            :                 offset -= s->red_left_pad;
    3938                 :            :         }
    3939                 :            : 
    3940                 :            :         /* Allow address range falling entirely within usercopy region. */
    3941                 :            :         if (offset >= s->useroffset &&
    3942                 :            :             offset - s->useroffset <= s->usersize &&
    3943                 :            :             n <= s->useroffset - offset + s->usersize)
    3944                 :            :                 return;
    3945                 :            : 
    3946                 :            :         /*
    3947                 :            :          * If the copy is still within the allocated object, produce
    3948                 :            :          * a warning instead of rejecting the copy. This is intended
    3949                 :            :          * to be a temporary method to find any missing usercopy
    3950                 :            :          * whitelists.
    3951                 :            :          */
    3952                 :            :         object_size = slab_ksize(s);
    3953                 :            :         if (usercopy_fallback &&
    3954                 :            :             offset <= object_size && n <= object_size - offset) {
    3955                 :            :                 usercopy_warn("SLUB object", s->name, to_user, offset, n);
    3956                 :            :                 return;
    3957                 :            :         }
    3958                 :            : 
    3959                 :            :         usercopy_abort("SLUB object", s->name, to_user, offset, n);
    3960                 :            : }
    3961                 :            : #endif /* CONFIG_HARDENED_USERCOPY */
    3962                 :            : 
    3963                 :          3 : size_t __ksize(const void *object)
    3964                 :            : {
    3965                 :            :         struct page *page;
    3966                 :            : 
    3967                 :          3 :         if (unlikely(object == ZERO_SIZE_PTR))
    3968                 :            :                 return 0;
    3969                 :            : 
    3970                 :            :         page = virt_to_head_page(object);
    3971                 :            : 
    3972                 :          3 :         if (unlikely(!PageSlab(page))) {
    3973                 :          3 :                 WARN_ON(!PageCompound(page));
    3974                 :          3 :                 return page_size(page);
    3975                 :            :         }
    3976                 :            : 
    3977                 :          3 :         return slab_ksize(page->slab_cache);
    3978                 :            : }
    3979                 :            : EXPORT_SYMBOL(__ksize);
    3980                 :            : 
    3981                 :          3 : void kfree(const void *x)
    3982                 :            : {
    3983                 :            :         struct page *page;
    3984                 :            :         void *object = (void *)x;
    3985                 :            : 
    3986                 :          3 :         trace_kfree(_RET_IP_, x);
    3987                 :            : 
    3988                 :          3 :         if (unlikely(ZERO_OR_NULL_PTR(x)))
    3989                 :            :                 return;
    3990                 :            : 
    3991                 :            :         page = virt_to_head_page(x);
    3992                 :          3 :         if (unlikely(!PageSlab(page))) {
    3993                 :            :                 unsigned int order = compound_order(page);
    3994                 :            : 
    3995                 :          3 :                 BUG_ON(!PageCompound(page));
    3996                 :            :                 kfree_hook(object);
    3997                 :          3 :                 mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
    3998                 :          3 :                                     -(1 << order));
    3999                 :          3 :                 __free_pages(page, order);
    4000                 :          3 :                 return;
    4001                 :            :         }
    4002                 :          3 :         slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
    4003                 :            : }
    4004                 :            : EXPORT_SYMBOL(kfree);
    4005                 :            : 
    4006                 :            : #define SHRINK_PROMOTE_MAX 32
    4007                 :            : 
    4008                 :            : /*
    4009                 :            :  * kmem_cache_shrink discards empty slabs and promotes the slabs filled
    4010                 :            :  * up most to the head of the partial lists. New allocations will then
    4011                 :            :  * fill those up and thus they can be removed from the partial lists.
    4012                 :            :  *
    4013                 :            :  * The slabs with the least items are placed last. This results in them
    4014                 :            :  * being allocated from last increasing the chance that the last objects
    4015                 :            :  * are freed in them.
    4016                 :            :  */
    4017                 :          0 : int __kmem_cache_shrink(struct kmem_cache *s)
    4018                 :            : {
    4019                 :            :         int node;
    4020                 :            :         int i;
    4021                 :            :         struct kmem_cache_node *n;
    4022                 :            :         struct page *page;
    4023                 :            :         struct page *t;
    4024                 :            :         struct list_head discard;
    4025                 :            :         struct list_head promote[SHRINK_PROMOTE_MAX];
    4026                 :            :         unsigned long flags;
    4027                 :            :         int ret = 0;
    4028                 :            : 
    4029                 :            :         flush_all(s);
    4030                 :          0 :         for_each_kmem_cache_node(s, node, n) {
    4031                 :            :                 INIT_LIST_HEAD(&discard);
    4032                 :          0 :                 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
    4033                 :          0 :                         INIT_LIST_HEAD(promote + i);
    4034                 :            : 
    4035                 :          0 :                 spin_lock_irqsave(&n->list_lock, flags);
    4036                 :            : 
    4037                 :            :                 /*
    4038                 :            :                  * Build lists of slabs to discard or promote.
    4039                 :            :                  *
    4040                 :            :                  * Note that concurrent frees may occur while we hold the
    4041                 :            :                  * list_lock. page->inuse here is the upper limit.
    4042                 :            :                  */
    4043                 :          0 :                 list_for_each_entry_safe(page, t, &n->partial, slab_list) {
    4044                 :          0 :                         int free = page->objects - page->inuse;
    4045                 :            : 
    4046                 :            :                         /* Do not reread page->inuse */
    4047                 :          0 :                         barrier();
    4048                 :            : 
    4049                 :            :                         /* We do not keep full slabs on the list */
    4050                 :          0 :                         BUG_ON(free <= 0);
    4051                 :            : 
    4052                 :          0 :                         if (free == page->objects) {
    4053                 :            :                                 list_move(&page->slab_list, &discard);
    4054                 :          0 :                                 n->nr_partial--;
    4055                 :          0 :                         } else if (free <= SHRINK_PROMOTE_MAX)
    4056                 :          0 :                                 list_move(&page->slab_list, promote + free - 1);
    4057                 :            :                 }
    4058                 :            : 
    4059                 :            :                 /*
    4060                 :            :                  * Promote the slabs filled up most to the head of the
    4061                 :            :                  * partial list.
    4062                 :            :                  */
    4063                 :          0 :                 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
    4064                 :          0 :                         list_splice(promote + i, &n->partial);
    4065                 :            : 
    4066                 :            :                 spin_unlock_irqrestore(&n->list_lock, flags);
    4067                 :            : 
    4068                 :            :                 /* Release empty slabs */
    4069                 :          0 :                 list_for_each_entry_safe(page, t, &discard, slab_list)
    4070                 :          0 :                         discard_slab(s, page);
    4071                 :            : 
    4072                 :          0 :                 if (slabs_node(s, node))
    4073                 :            :                         ret = 1;
    4074                 :            :         }
    4075                 :            : 
    4076                 :          0 :         return ret;
    4077                 :            : }
    4078                 :            : 
    4079                 :            : #ifdef CONFIG_MEMCG
    4080                 :          0 : void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
    4081                 :            : {
    4082                 :            :         /*
    4083                 :            :          * Called with all the locks held after a sched RCU grace period.
    4084                 :            :          * Even if @s becomes empty after shrinking, we can't know that @s
    4085                 :            :          * doesn't have allocations already in-flight and thus can't
    4086                 :            :          * destroy @s until the associated memcg is released.
    4087                 :            :          *
    4088                 :            :          * However, let's remove the sysfs files for empty caches here.
    4089                 :            :          * Each cache has a lot of interface files which aren't
    4090                 :            :          * particularly useful for empty draining caches; otherwise, we can
    4091                 :            :          * easily end up with millions of unnecessary sysfs files on
    4092                 :            :          * systems which have a lot of memory and transient cgroups.
    4093                 :            :          */
    4094                 :          0 :         if (!__kmem_cache_shrink(s))
    4095                 :          0 :                 sysfs_slab_remove(s);
    4096                 :          0 : }
    4097                 :            : 
    4098                 :          0 : void __kmemcg_cache_deactivate(struct kmem_cache *s)
    4099                 :            : {
    4100                 :            :         /*
    4101                 :            :          * Disable empty slabs caching. Used to avoid pinning offline
    4102                 :            :          * memory cgroups by kmem pages that can be freed.
    4103                 :            :          */
    4104                 :          0 :         slub_set_cpu_partial(s, 0);
    4105                 :          0 :         s->min_partial = 0;
    4106                 :          0 : }
    4107                 :            : #endif  /* CONFIG_MEMCG */
    4108                 :            : 
    4109                 :            : static int slab_mem_going_offline_callback(void *arg)
    4110                 :            : {
    4111                 :            :         struct kmem_cache *s;
    4112                 :            : 
    4113                 :            :         mutex_lock(&slab_mutex);
    4114                 :            :         list_for_each_entry(s, &slab_caches, list)
    4115                 :            :                 __kmem_cache_shrink(s);
    4116                 :            :         mutex_unlock(&slab_mutex);
    4117                 :            : 
    4118                 :            :         return 0;
    4119                 :            : }
    4120                 :            : 
    4121                 :            : static void slab_mem_offline_callback(void *arg)
    4122                 :            : {
    4123                 :            :         struct kmem_cache_node *n;
    4124                 :            :         struct kmem_cache *s;
    4125                 :            :         struct memory_notify *marg = arg;
    4126                 :            :         int offline_node;
    4127                 :            : 
    4128                 :            :         offline_node = marg->status_change_nid_normal;
    4129                 :            : 
    4130                 :            :         /*
    4131                 :            :          * If the node still has available memory. we need kmem_cache_node
    4132                 :            :          * for it yet.
    4133                 :            :          */
    4134                 :            :         if (offline_node < 0)
    4135                 :            :                 return;
    4136                 :            : 
    4137                 :            :         mutex_lock(&slab_mutex);
    4138                 :            :         list_for_each_entry(s, &slab_caches, list) {
    4139                 :            :                 n = get_node(s, offline_node);
    4140                 :            :                 if (n) {
    4141                 :            :                         /*
    4142                 :            :                          * if n->nr_slabs > 0, slabs still exist on the node
    4143                 :            :                          * that is going down. We were unable to free them,
    4144                 :            :                          * and offline_pages() function shouldn't call this
    4145                 :            :                          * callback. So, we must fail.
    4146                 :            :                          */
    4147                 :            :                         BUG_ON(slabs_node(s, offline_node));
    4148                 :            : 
    4149                 :            :                         s->node[offline_node] = NULL;
    4150                 :            :                         kmem_cache_free(kmem_cache_node, n);
    4151                 :            :                 }
    4152                 :            :         }
    4153                 :            :         mutex_unlock(&slab_mutex);
    4154                 :            : }
    4155                 :            : 
    4156                 :            : static int slab_mem_going_online_callback(void *arg)
    4157                 :            : {
    4158                 :            :         struct kmem_cache_node *n;
    4159                 :            :         struct kmem_cache *s;
    4160                 :            :         struct memory_notify *marg = arg;
    4161                 :            :         int nid = marg->status_change_nid_normal;
    4162                 :            :         int ret = 0;
    4163                 :            : 
    4164                 :            :         /*
    4165                 :            :          * If the node's memory is already available, then kmem_cache_node is
    4166                 :            :          * already created. Nothing to do.
    4167                 :            :          */
    4168                 :            :         if (nid < 0)
    4169                 :            :                 return 0;
    4170                 :            : 
    4171                 :            :         /*
    4172                 :            :          * We are bringing a node online. No memory is available yet. We must
    4173                 :            :          * allocate a kmem_cache_node structure in order to bring the node
    4174                 :            :          * online.
    4175                 :            :          */
    4176                 :            :         mutex_lock(&slab_mutex);
    4177                 :            :         list_for_each_entry(s, &slab_caches, list) {
    4178                 :            :                 /*
    4179                 :            :                  * XXX: kmem_cache_alloc_node will fallback to other nodes
    4180                 :            :                  *      since memory is not yet available from the node that
    4181                 :            :                  *      is brought up.
    4182                 :            :                  */
    4183                 :            :                 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
    4184                 :            :                 if (!n) {
    4185                 :            :                         ret = -ENOMEM;
    4186                 :            :                         goto out;
    4187                 :            :                 }
    4188                 :            :                 init_kmem_cache_node(n);
    4189                 :            :                 s->node[nid] = n;
    4190                 :            :         }
    4191                 :            : out:
    4192                 :            :         mutex_unlock(&slab_mutex);
    4193                 :            :         return ret;
    4194                 :            : }
    4195                 :            : 
    4196                 :            : static int slab_memory_callback(struct notifier_block *self,
    4197                 :            :                                 unsigned long action, void *arg)
    4198                 :            : {
    4199                 :            :         int ret = 0;
    4200                 :            : 
    4201                 :            :         switch (action) {
    4202                 :            :         case MEM_GOING_ONLINE:
    4203                 :            :                 ret = slab_mem_going_online_callback(arg);
    4204                 :            :                 break;
    4205                 :            :         case MEM_GOING_OFFLINE:
    4206                 :            :                 ret = slab_mem_going_offline_callback(arg);
    4207                 :            :                 break;
    4208                 :            :         case MEM_OFFLINE:
    4209                 :            :         case MEM_CANCEL_ONLINE:
    4210                 :            :                 slab_mem_offline_callback(arg);
    4211                 :            :                 break;
    4212                 :            :         case MEM_ONLINE:
    4213                 :            :         case MEM_CANCEL_OFFLINE:
    4214                 :            :                 break;
    4215                 :            :         }
    4216                 :            :         if (ret)
    4217                 :            :                 ret = notifier_from_errno(ret);
    4218                 :            :         else
    4219                 :            :                 ret = NOTIFY_OK;
    4220                 :            :         return ret;
    4221                 :            : }
    4222                 :            : 
    4223                 :            : static struct notifier_block slab_memory_callback_nb = {
    4224                 :            :         .notifier_call = slab_memory_callback,
    4225                 :            :         .priority = SLAB_CALLBACK_PRI,
    4226                 :            : };
    4227                 :            : 
    4228                 :            : /********************************************************************
    4229                 :            :  *                      Basic setup of slabs
    4230                 :            :  *******************************************************************/
    4231                 :            : 
    4232                 :            : /*
    4233                 :            :  * Used for early kmem_cache structures that were allocated using
    4234                 :            :  * the page allocator. Allocate them properly then fix up the pointers
    4235                 :            :  * that may be pointing to the wrong kmem_cache structure.
    4236                 :            :  */
    4237                 :            : 
    4238                 :          3 : static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
    4239                 :            : {
    4240                 :            :         int node;
    4241                 :          3 :         struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
    4242                 :            :         struct kmem_cache_node *n;
    4243                 :            : 
    4244                 :          3 :         memcpy(s, static_cache, kmem_cache->object_size);
    4245                 :            : 
    4246                 :            :         /*
    4247                 :            :          * This runs very early, and only the boot processor is supposed to be
    4248                 :            :          * up.  Even if it weren't true, IRQs are not up so we couldn't fire
    4249                 :            :          * IPIs around.
    4250                 :            :          */
    4251                 :          3 :         __flush_cpu_slab(s, smp_processor_id());
    4252                 :          3 :         for_each_kmem_cache_node(s, node, n) {
    4253                 :            :                 struct page *p;
    4254                 :            : 
    4255                 :          3 :                 list_for_each_entry(p, &n->partial, slab_list)
    4256                 :          3 :                         p->slab_cache = s;
    4257                 :            : 
    4258                 :            : #ifdef CONFIG_SLUB_DEBUG
    4259                 :          3 :                 list_for_each_entry(p, &n->full, slab_list)
    4260                 :          0 :                         p->slab_cache = s;
    4261                 :            : #endif
    4262                 :            :         }
    4263                 :          3 :         slab_init_memcg_params(s);
    4264                 :          3 :         list_add(&s->list, &slab_caches);
    4265                 :          3 :         memcg_link_cache(s, NULL);
    4266                 :          3 :         return s;
    4267                 :            : }
    4268                 :            : 
    4269                 :          3 : void __init kmem_cache_init(void)
    4270                 :            : {
    4271                 :            :         static __initdata struct kmem_cache boot_kmem_cache,
    4272                 :            :                 boot_kmem_cache_node;
    4273                 :            : 
    4274                 :            :         if (debug_guardpage_minorder())
    4275                 :            :                 slub_max_order = 0;
    4276                 :            : 
    4277                 :          3 :         kmem_cache_node = &boot_kmem_cache_node;
    4278                 :          3 :         kmem_cache = &boot_kmem_cache;
    4279                 :            : 
    4280                 :          3 :         create_boot_cache(kmem_cache_node, "kmem_cache_node",
    4281                 :            :                 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
    4282                 :            : 
    4283                 :            :         register_hotmemory_notifier(&slab_memory_callback_nb);
    4284                 :            : 
    4285                 :            :         /* Able to allocate the per node structures */
    4286                 :          3 :         slab_state = PARTIAL;
    4287                 :            : 
    4288                 :          3 :         create_boot_cache(kmem_cache, "kmem_cache",
    4289                 :            :                         offsetof(struct kmem_cache, node) +
    4290                 :            :                                 nr_node_ids * sizeof(struct kmem_cache_node *),
    4291                 :            :                        SLAB_HWCACHE_ALIGN, 0, 0);
    4292                 :            : 
    4293                 :          3 :         kmem_cache = bootstrap(&boot_kmem_cache);
    4294                 :          3 :         kmem_cache_node = bootstrap(&boot_kmem_cache_node);
    4295                 :            : 
    4296                 :            :         /* Now we can use the kmem_cache to allocate kmalloc slabs */
    4297                 :          3 :         setup_kmalloc_cache_index_table();
    4298                 :          3 :         create_kmalloc_caches(0);
    4299                 :            : 
    4300                 :            :         /* Setup random freelists for each cache */
    4301                 :            :         init_freelist_randomization();
    4302                 :            : 
    4303                 :            :         cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
    4304                 :            :                                   slub_cpu_dead);
    4305                 :            : 
    4306                 :          3 :         pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
    4307                 :            :                 cache_line_size(),
    4308                 :            :                 slub_min_order, slub_max_order, slub_min_objects,
    4309                 :            :                 nr_cpu_ids, nr_node_ids);
    4310                 :          3 : }
    4311                 :            : 
    4312                 :          3 : void __init kmem_cache_init_late(void)
    4313                 :            : {
    4314                 :          3 : }
    4315                 :            : 
    4316                 :            : struct kmem_cache *
    4317                 :          3 : __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
    4318                 :            :                    slab_flags_t flags, void (*ctor)(void *))
    4319                 :            : {
    4320                 :            :         struct kmem_cache *s, *c;
    4321                 :            : 
    4322                 :          3 :         s = find_mergeable(size, align, flags, name, ctor);
    4323                 :          3 :         if (s) {
    4324                 :          3 :                 s->refcount++;
    4325                 :            : 
    4326                 :            :                 /*
    4327                 :            :                  * Adjust the object sizes so that we clear
    4328                 :            :                  * the complete object on kzalloc.
    4329                 :            :                  */
    4330                 :          3 :                 s->object_size = max(s->object_size, size);
    4331                 :          3 :                 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
    4332                 :            : 
    4333                 :          3 :                 for_each_memcg_cache(c, s) {
    4334                 :          0 :                         c->object_size = s->object_size;
    4335                 :          0 :                         c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
    4336                 :            :                 }
    4337                 :            : 
    4338                 :          3 :                 if (sysfs_slab_alias(s, name)) {
    4339                 :          0 :                         s->refcount--;
    4340                 :            :                         s = NULL;
    4341                 :            :                 }
    4342                 :            :         }
    4343                 :            : 
    4344                 :          3 :         return s;
    4345                 :            : }
    4346                 :            : 
    4347                 :          3 : int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
    4348                 :            : {
    4349                 :            :         int err;
    4350                 :            : 
    4351                 :          3 :         err = kmem_cache_open(s, flags);
    4352                 :          3 :         if (err)
    4353                 :            :                 return err;
    4354                 :            : 
    4355                 :            :         /* Mutex is not taken during early boot */
    4356                 :          3 :         if (slab_state <= UP)
    4357                 :            :                 return 0;
    4358                 :            : 
    4359                 :          3 :         memcg_propagate_slab_attrs(s);
    4360                 :          3 :         err = sysfs_slab_add(s);
    4361                 :          3 :         if (err)
    4362                 :            :                 __kmem_cache_release(s);
    4363                 :            : 
    4364                 :          3 :         return err;
    4365                 :            : }
    4366                 :            : 
    4367                 :          3 : void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
    4368                 :            : {
    4369                 :            :         struct kmem_cache *s;
    4370                 :            :         void *ret;
    4371                 :            : 
    4372                 :          3 :         if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
    4373                 :          3 :                 return kmalloc_large(size, gfpflags);
    4374                 :            : 
    4375                 :          3 :         s = kmalloc_slab(size, gfpflags);
    4376                 :            : 
    4377                 :          3 :         if (unlikely(ZERO_OR_NULL_PTR(s)))
    4378                 :            :                 return s;
    4379                 :            : 
    4380                 :            :         ret = slab_alloc(s, gfpflags, caller);
    4381                 :            : 
    4382                 :            :         /* Honor the call site pointer we received. */
    4383                 :          3 :         trace_kmalloc(caller, ret, size, s->size, gfpflags);
    4384                 :            : 
    4385                 :          3 :         return ret;
    4386                 :            : }
    4387                 :            : 
    4388                 :            : #ifdef CONFIG_NUMA
    4389                 :            : void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
    4390                 :            :                                         int node, unsigned long caller)
    4391                 :            : {
    4392                 :            :         struct kmem_cache *s;
    4393                 :            :         void *ret;
    4394                 :            : 
    4395                 :            :         if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
    4396                 :            :                 ret = kmalloc_large_node(size, gfpflags, node);
    4397                 :            : 
    4398                 :            :                 trace_kmalloc_node(caller, ret,
    4399                 :            :                                    size, PAGE_SIZE << get_order(size),
    4400                 :            :                                    gfpflags, node);
    4401                 :            : 
    4402                 :            :                 return ret;
    4403                 :            :         }
    4404                 :            : 
    4405                 :            :         s = kmalloc_slab(size, gfpflags);
    4406                 :            : 
    4407                 :            :         if (unlikely(ZERO_OR_NULL_PTR(s)))
    4408                 :            :                 return s;
    4409                 :            : 
    4410                 :            :         ret = slab_alloc_node(s, gfpflags, node, caller);
    4411                 :            : 
    4412                 :            :         /* Honor the call site pointer we received. */
    4413                 :            :         trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
    4414                 :            : 
    4415                 :            :         return ret;
    4416                 :            : }
    4417                 :            : #endif
    4418                 :            : 
    4419                 :            : #ifdef CONFIG_SYSFS
    4420                 :          0 : static int count_inuse(struct page *page)
    4421                 :            : {
    4422                 :          0 :         return page->inuse;
    4423                 :            : }
    4424                 :            : 
    4425                 :          0 : static int count_total(struct page *page)
    4426                 :            : {
    4427                 :          0 :         return page->objects;
    4428                 :            : }
    4429                 :            : #endif
    4430                 :            : 
    4431                 :            : #ifdef CONFIG_SLUB_DEBUG
    4432                 :          0 : static int validate_slab(struct kmem_cache *s, struct page *page,
    4433                 :            :                                                 unsigned long *map)
    4434                 :            : {
    4435                 :            :         void *p;
    4436                 :            :         void *addr = page_address(page);
    4437                 :            : 
    4438                 :          0 :         if (!check_slab(s, page) ||
    4439                 :          0 :                         !on_freelist(s, page, NULL))
    4440                 :            :                 return 0;
    4441                 :            : 
    4442                 :            :         /* Now we know that a valid freelist exists */
    4443                 :          0 :         bitmap_zero(map, page->objects);
    4444                 :            : 
    4445                 :          0 :         get_map(s, page, map);
    4446                 :          0 :         for_each_object(p, s, addr, page->objects) {
    4447                 :          0 :                 if (test_bit(slab_index(p, s, addr), map))
    4448                 :          0 :                         if (!check_object(s, page, p, SLUB_RED_INACTIVE))
    4449                 :            :                                 return 0;
    4450                 :            :         }
    4451                 :            : 
    4452                 :          0 :         for_each_object(p, s, addr, page->objects)
    4453                 :          0 :                 if (!test_bit(slab_index(p, s, addr), map))
    4454                 :          0 :                         if (!check_object(s, page, p, SLUB_RED_ACTIVE))
    4455                 :            :                                 return 0;
    4456                 :            :         return 1;
    4457                 :            : }
    4458                 :            : 
    4459                 :          0 : static void validate_slab_slab(struct kmem_cache *s, struct page *page,
    4460                 :            :                                                 unsigned long *map)
    4461                 :            : {
    4462                 :            :         slab_lock(page);
    4463                 :          0 :         validate_slab(s, page, map);
    4464                 :            :         slab_unlock(page);
    4465                 :          0 : }
    4466                 :            : 
    4467                 :          0 : static int validate_slab_node(struct kmem_cache *s,
    4468                 :            :                 struct kmem_cache_node *n, unsigned long *map)
    4469                 :            : {
    4470                 :            :         unsigned long count = 0;
    4471                 :            :         struct page *page;
    4472                 :            :         unsigned long flags;
    4473                 :            : 
    4474                 :          0 :         spin_lock_irqsave(&n->list_lock, flags);
    4475                 :            : 
    4476                 :          0 :         list_for_each_entry(page, &n->partial, slab_list) {
    4477                 :          0 :                 validate_slab_slab(s, page, map);
    4478                 :          0 :                 count++;
    4479                 :            :         }
    4480                 :          0 :         if (count != n->nr_partial)
    4481                 :          0 :                 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
    4482                 :            :                        s->name, count, n->nr_partial);
    4483                 :            : 
    4484                 :          0 :         if (!(s->flags & SLAB_STORE_USER))
    4485                 :            :                 goto out;
    4486                 :            : 
    4487                 :          0 :         list_for_each_entry(page, &n->full, slab_list) {
    4488                 :          0 :                 validate_slab_slab(s, page, map);
    4489                 :          0 :                 count++;
    4490                 :            :         }
    4491                 :          0 :         if (count != atomic_long_read(&n->nr_slabs))
    4492                 :          0 :                 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
    4493                 :            :                        s->name, count, atomic_long_read(&n->nr_slabs));
    4494                 :            : 
    4495                 :            : out:
    4496                 :            :         spin_unlock_irqrestore(&n->list_lock, flags);
    4497                 :          0 :         return count;
    4498                 :            : }
    4499                 :            : 
    4500                 :          0 : static long validate_slab_cache(struct kmem_cache *s)
    4501                 :            : {
    4502                 :            :         int node;
    4503                 :            :         unsigned long count = 0;
    4504                 :            :         struct kmem_cache_node *n;
    4505                 :          0 :         unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
    4506                 :            : 
    4507                 :          0 :         if (!map)
    4508                 :            :                 return -ENOMEM;
    4509                 :            : 
    4510                 :            :         flush_all(s);
    4511                 :          0 :         for_each_kmem_cache_node(s, node, n)
    4512                 :          0 :                 count += validate_slab_node(s, n, map);
    4513                 :          0 :         bitmap_free(map);
    4514                 :          0 :         return count;
    4515                 :            : }
    4516                 :            : /*
    4517                 :            :  * Generate lists of code addresses where slabcache objects are allocated
    4518                 :            :  * and freed.
    4519                 :            :  */
    4520                 :            : 
    4521                 :            : struct location {
    4522                 :            :         unsigned long count;
    4523                 :            :         unsigned long addr;
    4524                 :            :         long long sum_time;
    4525                 :            :         long min_time;
    4526                 :            :         long max_time;
    4527                 :            :         long min_pid;
    4528                 :            :         long max_pid;
    4529                 :            :         DECLARE_BITMAP(cpus, NR_CPUS);
    4530                 :            :         nodemask_t nodes;
    4531                 :            : };
    4532                 :            : 
    4533                 :            : struct loc_track {
    4534                 :            :         unsigned long max;
    4535                 :            :         unsigned long count;
    4536                 :            :         struct location *loc;
    4537                 :            : };
    4538                 :            : 
    4539                 :          0 : static void free_loc_track(struct loc_track *t)
    4540                 :            : {
    4541                 :          0 :         if (t->max)
    4542                 :          0 :                 free_pages((unsigned long)t->loc,
    4543                 :          0 :                         get_order(sizeof(struct location) * t->max));
    4544                 :          0 : }
    4545                 :            : 
    4546                 :          0 : static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
    4547                 :            : {
    4548                 :            :         struct location *l;
    4549                 :            :         int order;
    4550                 :            : 
    4551                 :          0 :         order = get_order(sizeof(struct location) * max);
    4552                 :            : 
    4553                 :          0 :         l = (void *)__get_free_pages(flags, order);
    4554                 :          0 :         if (!l)
    4555                 :            :                 return 0;
    4556                 :            : 
    4557                 :          0 :         if (t->count) {
    4558                 :          0 :                 memcpy(l, t->loc, sizeof(struct location) * t->count);
    4559                 :          0 :                 free_loc_track(t);
    4560                 :            :         }
    4561                 :          0 :         t->max = max;
    4562                 :          0 :         t->loc = l;
    4563                 :          0 :         return 1;
    4564                 :            : }
    4565                 :            : 
    4566                 :          0 : static int add_location(struct loc_track *t, struct kmem_cache *s,
    4567                 :            :                                 const struct track *track)
    4568                 :            : {
    4569                 :            :         long start, end, pos;
    4570                 :            :         struct location *l;
    4571                 :            :         unsigned long caddr;
    4572                 :          0 :         unsigned long age = jiffies - track->when;
    4573                 :            : 
    4574                 :            :         start = -1;
    4575                 :          0 :         end = t->count;
    4576                 :            : 
    4577                 :            :         for ( ; ; ) {
    4578                 :          0 :                 pos = start + (end - start + 1) / 2;
    4579                 :            : 
    4580                 :            :                 /*
    4581                 :            :                  * There is nothing at "end". If we end up there
    4582                 :            :                  * we need to add something to before end.
    4583                 :            :                  */
    4584                 :          0 :                 if (pos == end)
    4585                 :            :                         break;
    4586                 :            : 
    4587                 :          0 :                 caddr = t->loc[pos].addr;
    4588                 :          0 :                 if (track->addr == caddr) {
    4589                 :            : 
    4590                 :          0 :                         l = &t->loc[pos];
    4591                 :          0 :                         l->count++;
    4592                 :          0 :                         if (track->when) {
    4593                 :          0 :                                 l->sum_time += age;
    4594                 :          0 :                                 if (age < l->min_time)
    4595                 :          0 :                                         l->min_time = age;
    4596                 :          0 :                                 if (age > l->max_time)
    4597                 :          0 :                                         l->max_time = age;
    4598                 :            : 
    4599                 :          0 :                                 if (track->pid < l->min_pid)
    4600                 :          0 :                                         l->min_pid = track->pid;
    4601                 :          0 :                                 if (track->pid > l->max_pid)
    4602                 :          0 :                                         l->max_pid = track->pid;
    4603                 :            : 
    4604                 :          0 :                                 cpumask_set_cpu(track->cpu,
    4605                 :            :                                                 to_cpumask(l->cpus));
    4606                 :            :                         }
    4607                 :            :                         node_set(page_to_nid(virt_to_page(track)), l->nodes);
    4608                 :          0 :                         return 1;
    4609                 :            :                 }
    4610                 :            : 
    4611                 :          0 :                 if (track->addr < caddr)
    4612                 :            :                         end = pos;
    4613                 :            :                 else
    4614                 :            :                         start = pos;
    4615                 :            :         }
    4616                 :            : 
    4617                 :            :         /*
    4618                 :            :          * Not found. Insert new tracking element.
    4619                 :            :          */
    4620                 :          0 :         if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
    4621                 :            :                 return 0;
    4622                 :            : 
    4623                 :          0 :         l = t->loc + pos;
    4624                 :          0 :         if (pos < t->count)
    4625                 :          0 :                 memmove(l + 1, l,
    4626                 :          0 :                         (t->count - pos) * sizeof(struct location));
    4627                 :          0 :         t->count++;
    4628                 :          0 :         l->count = 1;
    4629                 :          0 :         l->addr = track->addr;
    4630                 :          0 :         l->sum_time = age;
    4631                 :          0 :         l->min_time = age;
    4632                 :          0 :         l->max_time = age;
    4633                 :          0 :         l->min_pid = track->pid;
    4634                 :          0 :         l->max_pid = track->pid;
    4635                 :            :         cpumask_clear(to_cpumask(l->cpus));
    4636                 :          0 :         cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
    4637                 :            :         nodes_clear(l->nodes);
    4638                 :            :         node_set(page_to_nid(virt_to_page(track)), l->nodes);
    4639                 :          0 :         return 1;
    4640                 :            : }
    4641                 :            : 
    4642                 :          0 : static void process_slab(struct loc_track *t, struct kmem_cache *s,
    4643                 :            :                 struct page *page, enum track_item alloc,
    4644                 :            :                 unsigned long *map)
    4645                 :            : {
    4646                 :            :         void *addr = page_address(page);
    4647                 :            :         void *p;
    4648                 :            : 
    4649                 :          0 :         bitmap_zero(map, page->objects);
    4650                 :          0 :         get_map(s, page, map);
    4651                 :            : 
    4652                 :          0 :         for_each_object(p, s, addr, page->objects)
    4653                 :          0 :                 if (!test_bit(slab_index(p, s, addr), map))
    4654                 :          0 :                         add_location(t, s, get_track(s, p, alloc));
    4655                 :          0 : }
    4656                 :            : 
    4657                 :          0 : static int list_locations(struct kmem_cache *s, char *buf,
    4658                 :            :                                         enum track_item alloc)
    4659                 :            : {
    4660                 :            :         int len = 0;
    4661                 :            :         unsigned long i;
    4662                 :          0 :         struct loc_track t = { 0, 0, NULL };
    4663                 :            :         int node;
    4664                 :            :         struct kmem_cache_node *n;
    4665                 :          0 :         unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
    4666                 :            : 
    4667                 :          0 :         if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
    4668                 :            :                                      GFP_KERNEL)) {
    4669                 :          0 :                 bitmap_free(map);
    4670                 :          0 :                 return sprintf(buf, "Out of memory\n");
    4671                 :            :         }
    4672                 :            :         /* Push back cpu slabs */
    4673                 :            :         flush_all(s);
    4674                 :            : 
    4675                 :          0 :         for_each_kmem_cache_node(s, node, n) {
    4676                 :            :                 unsigned long flags;
    4677                 :            :                 struct page *page;
    4678                 :            : 
    4679                 :          0 :                 if (!atomic_long_read(&n->nr_slabs))
    4680                 :          0 :                         continue;
    4681                 :            : 
    4682                 :          0 :                 spin_lock_irqsave(&n->list_lock, flags);
    4683                 :          0 :                 list_for_each_entry(page, &n->partial, slab_list)
    4684                 :          0 :                         process_slab(&t, s, page, alloc, map);
    4685                 :          0 :                 list_for_each_entry(page, &n->full, slab_list)
    4686                 :          0 :                         process_slab(&t, s, page, alloc, map);
    4687                 :            :                 spin_unlock_irqrestore(&n->list_lock, flags);
    4688                 :            :         }
    4689                 :            : 
    4690                 :          0 :         for (i = 0; i < t.count; i++) {
    4691                 :          0 :                 struct location *l = &t.loc[i];
    4692                 :            : 
    4693                 :          0 :                 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
    4694                 :            :                         break;
    4695                 :          0 :                 len += sprintf(buf + len, "%7ld ", l->count);
    4696                 :            : 
    4697                 :          0 :                 if (l->addr)
    4698                 :          0 :                         len += sprintf(buf + len, "%pS", (void *)l->addr);
    4699                 :            :                 else
    4700                 :          0 :                         len += sprintf(buf + len, "<not-available>");
    4701                 :            : 
    4702                 :          0 :                 if (l->sum_time != l->min_time) {
    4703                 :          0 :                         len += sprintf(buf + len, " age=%ld/%ld/%ld",
    4704                 :            :                                 l->min_time,
    4705                 :          0 :                                 (long)div_u64(l->sum_time, l->count),
    4706                 :            :                                 l->max_time);
    4707                 :            :                 } else
    4708                 :          0 :                         len += sprintf(buf + len, " age=%ld",
    4709                 :            :                                 l->min_time);
    4710                 :            : 
    4711                 :          0 :                 if (l->min_pid != l->max_pid)
    4712                 :          0 :                         len += sprintf(buf + len, " pid=%ld-%ld",
    4713                 :            :                                 l->min_pid, l->max_pid);
    4714                 :            :                 else
    4715                 :          0 :                         len += sprintf(buf + len, " pid=%ld",
    4716                 :            :                                 l->min_pid);
    4717                 :            : 
    4718                 :          0 :                 if (num_online_cpus() > 1 &&
    4719                 :          0 :                                 !cpumask_empty(to_cpumask(l->cpus)) &&
    4720                 :          0 :                                 len < PAGE_SIZE - 60)
    4721                 :          0 :                         len += scnprintf(buf + len, PAGE_SIZE - len - 50,
    4722                 :            :                                          " cpus=%*pbl",
    4723                 :            :                                          cpumask_pr_args(to_cpumask(l->cpus)));
    4724                 :            : 
    4725                 :            :                 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
    4726                 :            :                                 len < PAGE_SIZE - 60)
    4727                 :            :                         len += scnprintf(buf + len, PAGE_SIZE - len - 50,
    4728                 :            :                                          " nodes=%*pbl",
    4729                 :            :                                          nodemask_pr_args(&l->nodes));
    4730                 :            : 
    4731                 :          0 :                 len += sprintf(buf + len, "\n");
    4732                 :            :         }
    4733                 :            : 
    4734                 :          0 :         free_loc_track(&t);
    4735                 :          0 :         bitmap_free(map);
    4736                 :          0 :         if (!t.count)
    4737                 :          0 :                 len += sprintf(buf, "No data\n");
    4738                 :          0 :         return len;
    4739                 :            : }
    4740                 :            : #endif  /* CONFIG_SLUB_DEBUG */
    4741                 :            : 
    4742                 :            : #ifdef SLUB_RESILIENCY_TEST
    4743                 :            : static void __init resiliency_test(void)
    4744                 :            : {
    4745                 :            :         u8 *p;
    4746                 :            :         int type = KMALLOC_NORMAL;
    4747                 :            : 
    4748                 :            :         BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
    4749                 :            : 
    4750                 :            :         pr_err("SLUB resiliency testing\n");
    4751                 :            :         pr_err("-----------------------\n");
    4752                 :            :         pr_err("A. Corruption after allocation\n");
    4753                 :            : 
    4754                 :            :         p = kzalloc(16, GFP_KERNEL);
    4755                 :            :         p[16] = 0x12;
    4756                 :            :         pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
    4757                 :            :                p + 16);
    4758                 :            : 
    4759                 :            :         validate_slab_cache(kmalloc_caches[type][4]);
    4760                 :            : 
    4761                 :            :         /* Hmmm... The next two are dangerous */
    4762                 :            :         p = kzalloc(32, GFP_KERNEL);
    4763                 :            :         p[32 + sizeof(void *)] = 0x34;
    4764                 :            :         pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
    4765                 :            :                p);
    4766                 :            :         pr_err("If allocated object is overwritten then not detectable\n\n");
    4767                 :            : 
    4768                 :            :         validate_slab_cache(kmalloc_caches[type][5]);
    4769                 :            :         p = kzalloc(64, GFP_KERNEL);
    4770                 :            :         p += 64 + (get_cycles() & 0xff) * sizeof(void *);
    4771                 :            :         *p = 0x56;
    4772                 :            :         pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
    4773                 :            :                p);
    4774                 :            :         pr_err("If allocated object is overwritten then not detectable\n\n");
    4775                 :            :         validate_slab_cache(kmalloc_caches[type][6]);
    4776                 :            : 
    4777                 :            :         pr_err("\nB. Corruption after free\n");
    4778                 :            :         p = kzalloc(128, GFP_KERNEL);
    4779                 :            :         kfree(p);
    4780                 :            :         *p = 0x78;
    4781                 :            :         pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
    4782                 :            :         validate_slab_cache(kmalloc_caches[type][7]);
    4783                 :            : 
    4784                 :            :         p = kzalloc(256, GFP_KERNEL);
    4785                 :            :         kfree(p);
    4786                 :            :         p[50] = 0x9a;
    4787                 :            :         pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
    4788                 :            :         validate_slab_cache(kmalloc_caches[type][8]);
    4789                 :            : 
    4790                 :            :         p = kzalloc(512, GFP_KERNEL);
    4791                 :            :         kfree(p);
    4792                 :            :         p[512] = 0xab;
    4793                 :            :         pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
    4794                 :            :         validate_slab_cache(kmalloc_caches[type][9]);
    4795                 :            : }
    4796                 :            : #else
    4797                 :            : #ifdef CONFIG_SYSFS
    4798                 :            : static void resiliency_test(void) {};
    4799                 :            : #endif
    4800                 :            : #endif  /* SLUB_RESILIENCY_TEST */
    4801                 :            : 
    4802                 :            : #ifdef CONFIG_SYSFS
    4803                 :            : enum slab_stat_type {
    4804                 :            :         SL_ALL,                 /* All slabs */
    4805                 :            :         SL_PARTIAL,             /* Only partially allocated slabs */
    4806                 :            :         SL_CPU,                 /* Only slabs used for cpu caches */
    4807                 :            :         SL_OBJECTS,             /* Determine allocated objects not slabs */
    4808                 :            :         SL_TOTAL                /* Determine object capacity not slabs */
    4809                 :            : };
    4810                 :            : 
    4811                 :            : #define SO_ALL          (1 << SL_ALL)
    4812                 :            : #define SO_PARTIAL      (1 << SL_PARTIAL)
    4813                 :            : #define SO_CPU          (1 << SL_CPU)
    4814                 :            : #define SO_OBJECTS      (1 << SL_OBJECTS)
    4815                 :            : #define SO_TOTAL        (1 << SL_TOTAL)
    4816                 :            : 
    4817                 :            : #ifdef CONFIG_MEMCG
    4818                 :            : static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
    4819                 :            : 
    4820                 :          0 : static int __init setup_slub_memcg_sysfs(char *str)
    4821                 :            : {
    4822                 :            :         int v;
    4823                 :            : 
    4824                 :          0 :         if (get_option(&str, &v) > 0)
    4825                 :          0 :                 memcg_sysfs_enabled = v;
    4826                 :            : 
    4827                 :          0 :         return 1;
    4828                 :            : }
    4829                 :            : 
    4830                 :            : __setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
    4831                 :            : #endif
    4832                 :            : 
    4833                 :          0 : static ssize_t show_slab_objects(struct kmem_cache *s,
    4834                 :            :                             char *buf, unsigned long flags)
    4835                 :            : {
    4836                 :            :         unsigned long total = 0;
    4837                 :            :         int node;
    4838                 :            :         int x;
    4839                 :            :         unsigned long *nodes;
    4840                 :            : 
    4841                 :            :         nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
    4842                 :          0 :         if (!nodes)
    4843                 :            :                 return -ENOMEM;
    4844                 :            : 
    4845                 :          0 :         if (flags & SO_CPU) {
    4846                 :            :                 int cpu;
    4847                 :            : 
    4848                 :          0 :                 for_each_possible_cpu(cpu) {
    4849                 :          0 :                         struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
    4850                 :            :                                                                cpu);
    4851                 :            :                         int node;
    4852                 :            :                         struct page *page;
    4853                 :            : 
    4854                 :          0 :                         page = READ_ONCE(c->page);
    4855                 :          0 :                         if (!page)
    4856                 :          0 :                                 continue;
    4857                 :            : 
    4858                 :            :                         node = page_to_nid(page);
    4859                 :          0 :                         if (flags & SO_TOTAL)
    4860                 :          0 :                                 x = page->objects;
    4861                 :          0 :                         else if (flags & SO_OBJECTS)
    4862                 :          0 :                                 x = page->inuse;
    4863                 :            :                         else
    4864                 :            :                                 x = 1;
    4865                 :            : 
    4866                 :          0 :                         total += x;
    4867                 :          0 :                         nodes[node] += x;
    4868                 :            : 
    4869                 :          0 :                         page = slub_percpu_partial_read_once(c);
    4870                 :          0 :                         if (page) {
    4871                 :            :                                 node = page_to_nid(page);
    4872                 :          0 :                                 if (flags & SO_TOTAL)
    4873                 :          0 :                                         WARN_ON_ONCE(1);
    4874                 :          0 :                                 else if (flags & SO_OBJECTS)
    4875                 :          0 :                                         WARN_ON_ONCE(1);
    4876                 :            :                                 else
    4877                 :          0 :                                         x = page->pages;
    4878                 :          0 :                                 total += x;
    4879                 :          0 :                                 nodes[node] += x;
    4880                 :            :                         }
    4881                 :            :                 }
    4882                 :            :         }
    4883                 :            : 
    4884                 :            :         /*
    4885                 :            :          * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
    4886                 :            :          * already held which will conflict with an existing lock order:
    4887                 :            :          *
    4888                 :            :          * mem_hotplug_lock->slab_mutex->kernfs_mutex
    4889                 :            :          *
    4890                 :            :          * We don't really need mem_hotplug_lock (to hold off
    4891                 :            :          * slab_mem_going_offline_callback) here because slab's memory hot
    4892                 :            :          * unplug code doesn't destroy the kmem_cache->node[] data.
    4893                 :            :          */
    4894                 :            : 
    4895                 :            : #ifdef CONFIG_SLUB_DEBUG
    4896                 :          0 :         if (flags & SO_ALL) {
    4897                 :            :                 struct kmem_cache_node *n;
    4898                 :            : 
    4899                 :          0 :                 for_each_kmem_cache_node(s, node, n) {
    4900                 :            : 
    4901                 :          0 :                         if (flags & SO_TOTAL)
    4902                 :            :                                 x = atomic_long_read(&n->total_objects);
    4903                 :          0 :                         else if (flags & SO_OBJECTS)
    4904                 :          0 :                                 x = atomic_long_read(&n->total_objects) -
    4905                 :          0 :                                         count_partial(n, count_free);
    4906                 :            :                         else
    4907                 :            :                                 x = atomic_long_read(&n->nr_slabs);
    4908                 :          0 :                         total += x;
    4909                 :          0 :                         nodes[node] += x;
    4910                 :            :                 }
    4911                 :            : 
    4912                 :            :         } else
    4913                 :            : #endif
    4914                 :          0 :         if (flags & SO_PARTIAL) {
    4915                 :            :                 struct kmem_cache_node *n;
    4916                 :            : 
    4917                 :          0 :                 for_each_kmem_cache_node(s, node, n) {
    4918                 :          0 :                         if (flags & SO_TOTAL)
    4919                 :          0 :                                 x = count_partial(n, count_total);
    4920                 :          0 :                         else if (flags & SO_OBJECTS)
    4921                 :          0 :                                 x = count_partial(n, count_inuse);
    4922                 :            :                         else
    4923                 :          0 :                                 x = n->nr_partial;
    4924                 :          0 :                         total += x;
    4925                 :          0 :                         nodes[node] += x;
    4926                 :            :                 }
    4927                 :            :         }
    4928                 :          0 :         x = sprintf(buf, "%lu", total);
    4929                 :            : #ifdef CONFIG_NUMA
    4930                 :            :         for (node = 0; node < nr_node_ids; node++)
    4931                 :            :                 if (nodes[node])
    4932                 :            :                         x += sprintf(buf + x, " N%d=%lu",
    4933                 :            :                                         node, nodes[node]);
    4934                 :            : #endif
    4935                 :          0 :         kfree(nodes);
    4936                 :          0 :         return x + sprintf(buf + x, "\n");
    4937                 :            : }
    4938                 :            : 
    4939                 :            : #ifdef CONFIG_SLUB_DEBUG
    4940                 :            : static int any_slab_objects(struct kmem_cache *s)
    4941                 :            : {
    4942                 :            :         int node;
    4943                 :            :         struct kmem_cache_node *n;
    4944                 :            : 
    4945                 :          0 :         for_each_kmem_cache_node(s, node, n)
    4946                 :          0 :                 if (atomic_long_read(&n->total_objects))
    4947                 :            :                         return 1;
    4948                 :            : 
    4949                 :            :         return 0;
    4950                 :            : }
    4951                 :            : #endif
    4952                 :            : 
    4953                 :            : #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
    4954                 :            : #define to_slab(n) container_of(n, struct kmem_cache, kobj)
    4955                 :            : 
    4956                 :            : struct slab_attribute {
    4957                 :            :         struct attribute attr;
    4958                 :            :         ssize_t (*show)(struct kmem_cache *s, char *buf);
    4959                 :            :         ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
    4960                 :            : };
    4961                 :            : 
    4962                 :            : #define SLAB_ATTR_RO(_name) \
    4963                 :            :         static struct slab_attribute _name##_attr = \
    4964                 :            :         __ATTR(_name, 0400, _name##_show, NULL)
    4965                 :            : 
    4966                 :            : #define SLAB_ATTR(_name) \
    4967                 :            :         static struct slab_attribute _name##_attr =  \
    4968                 :            :         __ATTR(_name, 0600, _name##_show, _name##_store)
    4969                 :            : 
    4970                 :          0 : static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
    4971                 :            : {
    4972                 :          0 :         return sprintf(buf, "%u\n", s->size);
    4973                 :            : }
    4974                 :            : SLAB_ATTR_RO(slab_size);
    4975                 :            : 
    4976                 :          0 : static ssize_t align_show(struct kmem_cache *s, char *buf)
    4977                 :            : {
    4978                 :          0 :         return sprintf(buf, "%u\n", s->align);
    4979                 :            : }
    4980                 :            : SLAB_ATTR_RO(align);
    4981                 :            : 
    4982                 :          0 : static ssize_t object_size_show(struct kmem_cache *s, char *buf)
    4983                 :            : {
    4984                 :          0 :         return sprintf(buf, "%u\n", s->object_size);
    4985                 :            : }
    4986                 :            : SLAB_ATTR_RO(object_size);
    4987                 :            : 
    4988                 :          0 : static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
    4989                 :            : {
    4990                 :          0 :         return sprintf(buf, "%u\n", oo_objects(s->oo));
    4991                 :            : }
    4992                 :            : SLAB_ATTR_RO(objs_per_slab);
    4993                 :            : 
    4994                 :          0 : static ssize_t order_store(struct kmem_cache *s,
    4995                 :            :                                 const char *buf, size_t length)
    4996                 :            : {
    4997                 :            :         unsigned int order;
    4998                 :            :         int err;
    4999                 :            : 
    5000                 :          0 :         err = kstrtouint(buf, 10, &order);
    5001                 :          0 :         if (err)
    5002                 :            :                 return err;
    5003                 :            : 
    5004                 :          0 :         if (order > slub_max_order || order < slub_min_order)
    5005                 :            :                 return -EINVAL;
    5006                 :            : 
    5007                 :          0 :         calculate_sizes(s, order);
    5008                 :          0 :         return length;
    5009                 :            : }
    5010                 :            : 
    5011                 :          0 : static ssize_t order_show(struct kmem_cache *s, char *buf)
    5012                 :            : {
    5013                 :          0 :         return sprintf(buf, "%u\n", oo_order(s->oo));
    5014                 :            : }
    5015                 :            : SLAB_ATTR(order);
    5016                 :            : 
    5017                 :          0 : static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
    5018                 :            : {
    5019                 :          0 :         return sprintf(buf, "%lu\n", s->min_partial);
    5020                 :            : }
    5021                 :            : 
    5022                 :          0 : static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
    5023                 :            :                                  size_t length)
    5024                 :            : {
    5025                 :            :         unsigned long min;
    5026                 :            :         int err;
    5027                 :            : 
    5028                 :            :         err = kstrtoul(buf, 10, &min);
    5029                 :          0 :         if (err)
    5030                 :            :                 return err;
    5031                 :            : 
    5032                 :          0 :         set_min_partial(s, min);
    5033                 :          0 :         return length;
    5034                 :            : }
    5035                 :            : SLAB_ATTR(min_partial);
    5036                 :            : 
    5037                 :          0 : static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
    5038                 :            : {
    5039                 :          0 :         return sprintf(buf, "%u\n", slub_cpu_partial(s));
    5040                 :            : }
    5041                 :            : 
    5042                 :          0 : static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
    5043                 :            :                                  size_t length)
    5044                 :            : {
    5045                 :            :         unsigned int objects;
    5046                 :            :         int err;
    5047                 :            : 
    5048                 :          0 :         err = kstrtouint(buf, 10, &objects);
    5049                 :          0 :         if (err)
    5050                 :            :                 return err;
    5051                 :          0 :         if (objects && !kmem_cache_has_cpu_partial(s))
    5052                 :            :                 return -EINVAL;
    5053                 :            : 
    5054                 :          0 :         slub_set_cpu_partial(s, objects);
    5055                 :            :         flush_all(s);
    5056                 :          0 :         return length;
    5057                 :            : }
    5058                 :            : SLAB_ATTR(cpu_partial);
    5059                 :            : 
    5060                 :          0 : static ssize_t ctor_show(struct kmem_cache *s, char *buf)
    5061                 :            : {
    5062                 :          0 :         if (!s->ctor)
    5063                 :            :                 return 0;
    5064                 :          0 :         return sprintf(buf, "%pS\n", s->ctor);
    5065                 :            : }
    5066                 :            : SLAB_ATTR_RO(ctor);
    5067                 :            : 
    5068                 :          0 : static ssize_t aliases_show(struct kmem_cache *s, char *buf)
    5069                 :            : {
    5070                 :          0 :         return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
    5071                 :            : }
    5072                 :            : SLAB_ATTR_RO(aliases);
    5073                 :            : 
    5074                 :          0 : static ssize_t partial_show(struct kmem_cache *s, char *buf)
    5075                 :            : {
    5076                 :          0 :         return show_slab_objects(s, buf, SO_PARTIAL);
    5077                 :            : }
    5078                 :            : SLAB_ATTR_RO(partial);
    5079                 :            : 
    5080                 :          0 : static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
    5081                 :            : {
    5082                 :          0 :         return show_slab_objects(s, buf, SO_CPU);
    5083                 :            : }
    5084                 :            : SLAB_ATTR_RO(cpu_slabs);
    5085                 :            : 
    5086                 :          0 : static ssize_t objects_show(struct kmem_cache *s, char *buf)
    5087                 :            : {
    5088                 :          0 :         return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
    5089                 :            : }
    5090                 :            : SLAB_ATTR_RO(objects);
    5091                 :            : 
    5092                 :          0 : static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
    5093                 :            : {
    5094                 :          0 :         return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
    5095                 :            : }
    5096                 :            : SLAB_ATTR_RO(objects_partial);
    5097                 :            : 
    5098                 :          0 : static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
    5099                 :            : {
    5100                 :            :         int objects = 0;
    5101                 :            :         int pages = 0;
    5102                 :            :         int cpu;
    5103                 :            :         int len;
    5104                 :            : 
    5105                 :          0 :         for_each_online_cpu(cpu) {
    5106                 :            :                 struct page *page;
    5107                 :            : 
    5108                 :          0 :                 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
    5109                 :            : 
    5110                 :          0 :                 if (page) {
    5111                 :          0 :                         pages += page->pages;
    5112                 :          0 :                         objects += page->pobjects;
    5113                 :            :                 }
    5114                 :            :         }
    5115                 :            : 
    5116                 :          0 :         len = sprintf(buf, "%d(%d)", objects, pages);
    5117                 :            : 
    5118                 :            : #ifdef CONFIG_SMP
    5119                 :          0 :         for_each_online_cpu(cpu) {
    5120                 :            :                 struct page *page;
    5121                 :            : 
    5122                 :          0 :                 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
    5123                 :            : 
    5124                 :          0 :                 if (page && len < PAGE_SIZE - 20)
    5125                 :          0 :                         len += sprintf(buf + len, " C%d=%d(%d)", cpu,
    5126                 :          0 :                                 page->pobjects, page->pages);
    5127                 :            :         }
    5128                 :            : #endif
    5129                 :          0 :         return len + sprintf(buf + len, "\n");
    5130                 :            : }
    5131                 :            : SLAB_ATTR_RO(slabs_cpu_partial);
    5132                 :            : 
    5133                 :          0 : static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
    5134                 :            : {
    5135                 :          0 :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
    5136                 :            : }
    5137                 :            : 
    5138                 :          0 : static ssize_t reclaim_account_store(struct kmem_cache *s,
    5139                 :            :                                 const char *buf, size_t length)
    5140                 :            : {
    5141                 :          0 :         s->flags &= ~SLAB_RECLAIM_ACCOUNT;
    5142                 :          0 :         if (buf[0] == '1')
    5143                 :          0 :                 s->flags |= SLAB_RECLAIM_ACCOUNT;
    5144                 :          0 :         return length;
    5145                 :            : }
    5146                 :            : SLAB_ATTR(reclaim_account);
    5147                 :            : 
    5148                 :          0 : static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
    5149                 :            : {
    5150                 :          0 :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
    5151                 :            : }
    5152                 :            : SLAB_ATTR_RO(hwcache_align);
    5153                 :            : 
    5154                 :            : #ifdef CONFIG_ZONE_DMA
    5155                 :            : static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
    5156                 :            : {
    5157                 :            :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
    5158                 :            : }
    5159                 :            : SLAB_ATTR_RO(cache_dma);
    5160                 :            : #endif
    5161                 :            : 
    5162                 :          0 : static ssize_t usersize_show(struct kmem_cache *s, char *buf)
    5163                 :            : {
    5164                 :          0 :         return sprintf(buf, "%u\n", s->usersize);
    5165                 :            : }
    5166                 :            : SLAB_ATTR_RO(usersize);
    5167                 :            : 
    5168                 :          0 : static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
    5169                 :            : {
    5170                 :          0 :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
    5171                 :            : }
    5172                 :            : SLAB_ATTR_RO(destroy_by_rcu);
    5173                 :            : 
    5174                 :            : #ifdef CONFIG_SLUB_DEBUG
    5175                 :          0 : static ssize_t slabs_show(struct kmem_cache *s, char *buf)
    5176                 :            : {
    5177                 :          0 :         return show_slab_objects(s, buf, SO_ALL);
    5178                 :            : }
    5179                 :            : SLAB_ATTR_RO(slabs);
    5180                 :            : 
    5181                 :          0 : static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
    5182                 :            : {
    5183                 :          0 :         return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
    5184                 :            : }
    5185                 :            : SLAB_ATTR_RO(total_objects);
    5186                 :            : 
    5187                 :          0 : static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
    5188                 :            : {
    5189                 :          0 :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
    5190                 :            : }
    5191                 :            : 
    5192                 :          0 : static ssize_t sanity_checks_store(struct kmem_cache *s,
    5193                 :            :                                 const char *buf, size_t length)
    5194                 :            : {
    5195                 :          0 :         s->flags &= ~SLAB_CONSISTENCY_CHECKS;
    5196                 :          0 :         if (buf[0] == '1') {
    5197                 :          0 :                 s->flags &= ~__CMPXCHG_DOUBLE;
    5198                 :          0 :                 s->flags |= SLAB_CONSISTENCY_CHECKS;
    5199                 :            :         }
    5200                 :          0 :         return length;
    5201                 :            : }
    5202                 :            : SLAB_ATTR(sanity_checks);
    5203                 :            : 
    5204                 :          0 : static ssize_t trace_show(struct kmem_cache *s, char *buf)
    5205                 :            : {
    5206                 :          0 :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
    5207                 :            : }
    5208                 :            : 
    5209                 :          0 : static ssize_t trace_store(struct kmem_cache *s, const char *buf,
    5210                 :            :                                                         size_t length)
    5211                 :            : {
    5212                 :            :         /*
    5213                 :            :          * Tracing a merged cache is going to give confusing results
    5214                 :            :          * as well as cause other issues like converting a mergeable
    5215                 :            :          * cache into an umergeable one.
    5216                 :            :          */
    5217                 :          0 :         if (s->refcount > 1)
    5218                 :            :                 return -EINVAL;
    5219                 :            : 
    5220                 :          0 :         s->flags &= ~SLAB_TRACE;
    5221                 :          0 :         if (buf[0] == '1') {
    5222                 :          0 :                 s->flags &= ~__CMPXCHG_DOUBLE;
    5223                 :          0 :                 s->flags |= SLAB_TRACE;
    5224                 :            :         }
    5225                 :          0 :         return length;
    5226                 :            : }
    5227                 :            : SLAB_ATTR(trace);
    5228                 :            : 
    5229                 :          0 : static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
    5230                 :            : {
    5231                 :          0 :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
    5232                 :            : }
    5233                 :            : 
    5234                 :          0 : static ssize_t red_zone_store(struct kmem_cache *s,
    5235                 :            :                                 const char *buf, size_t length)
    5236                 :            : {
    5237                 :          0 :         if (any_slab_objects(s))
    5238                 :            :                 return -EBUSY;
    5239                 :            : 
    5240                 :          0 :         s->flags &= ~SLAB_RED_ZONE;
    5241                 :          0 :         if (buf[0] == '1') {
    5242                 :          0 :                 s->flags |= SLAB_RED_ZONE;
    5243                 :            :         }
    5244                 :          0 :         calculate_sizes(s, -1);
    5245                 :          0 :         return length;
    5246                 :            : }
    5247                 :            : SLAB_ATTR(red_zone);
    5248                 :            : 
    5249                 :          0 : static ssize_t poison_show(struct kmem_cache *s, char *buf)
    5250                 :            : {
    5251                 :          0 :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
    5252                 :            : }
    5253                 :            : 
    5254                 :          0 : static ssize_t poison_store(struct kmem_cache *s,
    5255                 :            :                                 const char *buf, size_t length)
    5256                 :            : {
    5257                 :          0 :         if (any_slab_objects(s))
    5258                 :            :                 return -EBUSY;
    5259                 :            : 
    5260                 :          0 :         s->flags &= ~SLAB_POISON;
    5261                 :          0 :         if (buf[0] == '1') {
    5262                 :          0 :                 s->flags |= SLAB_POISON;
    5263                 :            :         }
    5264                 :          0 :         calculate_sizes(s, -1);
    5265                 :          0 :         return length;
    5266                 :            : }
    5267                 :            : SLAB_ATTR(poison);
    5268                 :            : 
    5269                 :          0 : static ssize_t store_user_show(struct kmem_cache *s, char *buf)
    5270                 :            : {
    5271                 :          0 :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
    5272                 :            : }
    5273                 :            : 
    5274                 :          0 : static ssize_t store_user_store(struct kmem_cache *s,
    5275                 :            :                                 const char *buf, size_t length)
    5276                 :            : {
    5277                 :          0 :         if (any_slab_objects(s))
    5278                 :            :                 return -EBUSY;
    5279                 :            : 
    5280                 :          0 :         s->flags &= ~SLAB_STORE_USER;
    5281                 :          0 :         if (buf[0] == '1') {
    5282                 :          0 :                 s->flags &= ~__CMPXCHG_DOUBLE;
    5283                 :          0 :                 s->flags |= SLAB_STORE_USER;
    5284                 :            :         }
    5285                 :          0 :         calculate_sizes(s, -1);
    5286                 :          0 :         return length;
    5287                 :            : }
    5288                 :            : SLAB_ATTR(store_user);
    5289                 :            : 
    5290                 :          0 : static ssize_t validate_show(struct kmem_cache *s, char *buf)
    5291                 :            : {
    5292                 :          0 :         return 0;
    5293                 :            : }
    5294                 :            : 
    5295                 :          0 : static ssize_t validate_store(struct kmem_cache *s,
    5296                 :            :                         const char *buf, size_t length)
    5297                 :            : {
    5298                 :            :         int ret = -EINVAL;
    5299                 :            : 
    5300                 :          0 :         if (buf[0] == '1') {
    5301                 :          0 :                 ret = validate_slab_cache(s);
    5302                 :          0 :                 if (ret >= 0)
    5303                 :          0 :                         ret = length;
    5304                 :            :         }
    5305                 :          0 :         return ret;
    5306                 :            : }
    5307                 :            : SLAB_ATTR(validate);
    5308                 :            : 
    5309                 :          0 : static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
    5310                 :            : {
    5311                 :          0 :         if (!(s->flags & SLAB_STORE_USER))
    5312                 :            :                 return -ENOSYS;
    5313                 :          0 :         return list_locations(s, buf, TRACK_ALLOC);
    5314                 :            : }
    5315                 :            : SLAB_ATTR_RO(alloc_calls);
    5316                 :            : 
    5317                 :          0 : static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
    5318                 :            : {
    5319                 :          0 :         if (!(s->flags & SLAB_STORE_USER))
    5320                 :            :                 return -ENOSYS;
    5321                 :          0 :         return list_locations(s, buf, TRACK_FREE);
    5322                 :            : }
    5323                 :            : SLAB_ATTR_RO(free_calls);
    5324                 :            : #endif /* CONFIG_SLUB_DEBUG */
    5325                 :            : 
    5326                 :            : #ifdef CONFIG_FAILSLAB
    5327                 :            : static ssize_t failslab_show(struct kmem_cache *s, char *buf)
    5328                 :            : {
    5329                 :            :         return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
    5330                 :            : }
    5331                 :            : 
    5332                 :            : static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
    5333                 :            :                                                         size_t length)
    5334                 :            : {
    5335                 :            :         if (s->refcount > 1)
    5336                 :            :                 return -EINVAL;
    5337                 :            : 
    5338                 :            :         s->flags &= ~SLAB_FAILSLAB;
    5339                 :            :         if (buf[0] == '1')
    5340                 :            :                 s->flags |= SLAB_FAILSLAB;
    5341                 :            :         return length;
    5342                 :            : }
    5343                 :            : SLAB_ATTR(failslab);
    5344                 :            : #endif
    5345                 :            : 
    5346                 :          0 : static ssize_t shrink_show(struct kmem_cache *s, char *buf)
    5347                 :            : {
    5348                 :          0 :         return 0;
    5349                 :            : }
    5350                 :            : 
    5351                 :          0 : static ssize_t shrink_store(struct kmem_cache *s,
    5352                 :            :                         const char *buf, size_t length)
    5353                 :            : {
    5354                 :          0 :         if (buf[0] == '1')
    5355                 :          0 :                 kmem_cache_shrink_all(s);
    5356                 :            :         else
    5357                 :            :                 return -EINVAL;
    5358                 :          0 :         return length;
    5359                 :            : }
    5360                 :            : SLAB_ATTR(shrink);
    5361                 :            : 
    5362                 :            : #ifdef CONFIG_NUMA
    5363                 :            : static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
    5364                 :            : {
    5365                 :            :         return sprintf(buf, "%u\n", s->remote_node_defrag_ratio / 10);
    5366                 :            : }
    5367                 :            : 
    5368                 :            : static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
    5369                 :            :                                 const char *buf, size_t length)
    5370                 :            : {
    5371                 :            :         unsigned int ratio;
    5372                 :            :         int err;
    5373                 :            : 
    5374                 :            :         err = kstrtouint(buf, 10, &ratio);
    5375                 :            :         if (err)
    5376                 :            :                 return err;
    5377                 :            :         if (ratio > 100)
    5378                 :            :                 return -ERANGE;
    5379                 :            : 
    5380                 :            :         s->remote_node_defrag_ratio = ratio * 10;
    5381                 :            : 
    5382                 :            :         return length;
    5383                 :            : }
    5384                 :            : SLAB_ATTR(remote_node_defrag_ratio);
    5385                 :            : #endif
    5386                 :            : 
    5387                 :            : #ifdef CONFIG_SLUB_STATS
    5388                 :            : static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
    5389                 :            : {
    5390                 :            :         unsigned long sum  = 0;
    5391                 :            :         int cpu;
    5392                 :            :         int len;
    5393                 :            :         int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
    5394                 :            : 
    5395                 :            :         if (!data)
    5396                 :            :                 return -ENOMEM;
    5397                 :            : 
    5398                 :            :         for_each_online_cpu(cpu) {
    5399                 :            :                 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
    5400                 :            : 
    5401                 :            :                 data[cpu] = x;
    5402                 :            :                 sum += x;
    5403                 :            :         }
    5404                 :            : 
    5405                 :            :         len = sprintf(buf, "%lu", sum);
    5406                 :            : 
    5407                 :            : #ifdef CONFIG_SMP
    5408                 :            :         for_each_online_cpu(cpu) {
    5409                 :            :                 if (data[cpu] && len < PAGE_SIZE - 20)
    5410                 :            :                         len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
    5411                 :            :         }
    5412                 :            : #endif
    5413                 :            :         kfree(data);
    5414                 :            :         return len + sprintf(buf + len, "\n");
    5415                 :            : }
    5416                 :            : 
    5417                 :            : static void clear_stat(struct kmem_cache *s, enum stat_item si)
    5418                 :            : {
    5419                 :            :         int cpu;
    5420                 :            : 
    5421                 :            :         for_each_online_cpu(cpu)
    5422                 :            :                 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
    5423                 :            : }
    5424                 :            : 
    5425                 :            : #define STAT_ATTR(si, text)                                     \
    5426                 :            : static ssize_t text##_show(struct kmem_cache *s, char *buf)     \
    5427                 :            : {                                                               \
    5428                 :            :         return show_stat(s, buf, si);                           \
    5429                 :            : }                                                               \
    5430                 :            : static ssize_t text##_store(struct kmem_cache *s,               \
    5431                 :            :                                 const char *buf, size_t length) \
    5432                 :            : {                                                               \
    5433                 :            :         if (buf[0] != '0')                                      \
    5434                 :            :                 return -EINVAL;                                 \
    5435                 :            :         clear_stat(s, si);                                      \
    5436                 :            :         return length;                                          \
    5437                 :            : }                                                               \
    5438                 :            : SLAB_ATTR(text);                                                \
    5439                 :            : 
    5440                 :            : STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
    5441                 :            : STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
    5442                 :            : STAT_ATTR(FREE_FASTPATH, free_fastpath);
    5443                 :            : STAT_ATTR(FREE_SLOWPATH, free_slowpath);
    5444                 :            : STAT_ATTR(FREE_FROZEN, free_frozen);
    5445                 :            : STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
    5446                 :            : STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
    5447                 :            : STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
    5448                 :            : STAT_ATTR(ALLOC_SLAB, alloc_slab);
    5449                 :            : STAT_ATTR(ALLOC_REFILL, alloc_refill);
    5450                 :            : STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
    5451                 :            : STAT_ATTR(FREE_SLAB, free_slab);
    5452                 :            : STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
    5453                 :            : STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
    5454                 :            : STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
    5455                 :            : STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
    5456                 :            : STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
    5457                 :            : STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
    5458                 :            : STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
    5459                 :            : STAT_ATTR(ORDER_FALLBACK, order_fallback);
    5460                 :            : STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
    5461                 :            : STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
    5462                 :            : STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
    5463                 :            : STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
    5464                 :            : STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
    5465                 :            : STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
    5466                 :            : #endif  /* CONFIG_SLUB_STATS */
    5467                 :            : 
    5468                 :            : static struct attribute *slab_attrs[] = {
    5469                 :            :         &slab_size_attr.attr,
    5470                 :            :         &object_size_attr.attr,
    5471                 :            :         &objs_per_slab_attr.attr,
    5472                 :            :         &order_attr.attr,
    5473                 :            :         &min_partial_attr.attr,
    5474                 :            :         &cpu_partial_attr.attr,
    5475                 :            :         &objects_attr.attr,
    5476                 :            :         &objects_partial_attr.attr,
    5477                 :            :         &partial_attr.attr,
    5478                 :            :         &cpu_slabs_attr.attr,
    5479                 :            :         &ctor_attr.attr,
    5480                 :            :         &aliases_attr.attr,
    5481                 :            :         &align_attr.attr,
    5482                 :            :         &hwcache_align_attr.attr,
    5483                 :            :         &reclaim_account_attr.attr,
    5484                 :            :         &destroy_by_rcu_attr.attr,
    5485                 :            :         &shrink_attr.attr,
    5486                 :            :         &slabs_cpu_partial_attr.attr,
    5487                 :            : #ifdef CONFIG_SLUB_DEBUG
    5488                 :            :         &total_objects_attr.attr,
    5489                 :            :         &slabs_attr.attr,
    5490                 :            :         &sanity_checks_attr.attr,
    5491                 :            :         &trace_attr.attr,
    5492                 :            :         &red_zone_attr.attr,
    5493                 :            :         &poison_attr.attr,
    5494                 :            :         &store_user_attr.attr,
    5495                 :            :         &validate_attr.attr,
    5496                 :            :         &alloc_calls_attr.attr,
    5497                 :            :         &free_calls_attr.attr,
    5498                 :            : #endif
    5499                 :            : #ifdef CONFIG_ZONE_DMA
    5500                 :            :         &cache_dma_attr.attr,
    5501                 :            : #endif
    5502                 :            : #ifdef CONFIG_NUMA
    5503                 :            :         &remote_node_defrag_ratio_attr.attr,
    5504                 :            : #endif
    5505                 :            : #ifdef CONFIG_SLUB_STATS
    5506                 :            :         &alloc_fastpath_attr.attr,
    5507                 :            :         &alloc_slowpath_attr.attr,
    5508                 :            :         &free_fastpath_attr.attr,
    5509                 :            :         &free_slowpath_attr.attr,
    5510                 :            :         &free_frozen_attr.attr,
    5511                 :            :         &free_add_partial_attr.attr,
    5512                 :            :         &free_remove_partial_attr.attr,
    5513                 :            :         &alloc_from_partial_attr.attr,
    5514                 :            :         &alloc_slab_attr.attr,
    5515                 :            :         &alloc_refill_attr.attr,
    5516                 :            :         &alloc_node_mismatch_attr.attr,
    5517                 :            :         &free_slab_attr.attr,
    5518                 :            :         &cpuslab_flush_attr.attr,
    5519                 :            :         &deactivate_full_attr.attr,
    5520                 :            :         &deactivate_empty_attr.attr,
    5521                 :            :         &deactivate_to_head_attr.attr,
    5522                 :            :         &deactivate_to_tail_attr.attr,
    5523                 :            :         &deactivate_remote_frees_attr.attr,
    5524                 :            :         &deactivate_bypass_attr.attr,
    5525                 :            :         &order_fallback_attr.attr,
    5526                 :            :         &cmpxchg_double_fail_attr.attr,
    5527                 :            :         &cmpxchg_double_cpu_fail_attr.attr,
    5528                 :            :         &cpu_partial_alloc_attr.attr,
    5529                 :            :         &cpu_partial_free_attr.attr,
    5530                 :            :         &cpu_partial_node_attr.attr,
    5531                 :            :         &cpu_partial_drain_attr.attr,
    5532                 :            : #endif
    5533                 :            : #ifdef CONFIG_FAILSLAB
    5534                 :            :         &failslab_attr.attr,
    5535                 :            : #endif
    5536                 :            :         &usersize_attr.attr,
    5537                 :            : 
    5538                 :            :         NULL
    5539                 :            : };
    5540                 :            : 
    5541                 :            : static const struct attribute_group slab_attr_group = {
    5542                 :            :         .attrs = slab_attrs,
    5543                 :            : };
    5544                 :            : 
    5545                 :          0 : static ssize_t slab_attr_show(struct kobject *kobj,
    5546                 :            :                                 struct attribute *attr,
    5547                 :            :                                 char *buf)
    5548                 :            : {
    5549                 :            :         struct slab_attribute *attribute;
    5550                 :            :         struct kmem_cache *s;
    5551                 :            :         int err;
    5552                 :            : 
    5553                 :            :         attribute = to_slab_attr(attr);
    5554                 :          0 :         s = to_slab(kobj);
    5555                 :            : 
    5556                 :          0 :         if (!attribute->show)
    5557                 :            :                 return -EIO;
    5558                 :            : 
    5559                 :          0 :         err = attribute->show(s, buf);
    5560                 :            : 
    5561                 :          0 :         return err;
    5562                 :            : }
    5563                 :            : 
    5564                 :          0 : static ssize_t slab_attr_store(struct kobject *kobj,
    5565                 :            :                                 struct attribute *attr,
    5566                 :            :                                 const char *buf, size_t len)
    5567                 :            : {
    5568                 :            :         struct slab_attribute *attribute;
    5569                 :            :         struct kmem_cache *s;
    5570                 :            :         int err;
    5571                 :            : 
    5572                 :            :         attribute = to_slab_attr(attr);
    5573                 :          0 :         s = to_slab(kobj);
    5574                 :            : 
    5575                 :          0 :         if (!attribute->store)
    5576                 :            :                 return -EIO;
    5577                 :            : 
    5578                 :          0 :         err = attribute->store(s, buf, len);
    5579                 :            : #ifdef CONFIG_MEMCG
    5580                 :          0 :         if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
    5581                 :            :                 struct kmem_cache *c;
    5582                 :            : 
    5583                 :          0 :                 mutex_lock(&slab_mutex);
    5584                 :          0 :                 if (s->max_attr_size < len)
    5585                 :          0 :                         s->max_attr_size = len;
    5586                 :            : 
    5587                 :            :                 /*
    5588                 :            :                  * This is a best effort propagation, so this function's return
    5589                 :            :                  * value will be determined by the parent cache only. This is
    5590                 :            :                  * basically because not all attributes will have a well
    5591                 :            :                  * defined semantics for rollbacks - most of the actions will
    5592                 :            :                  * have permanent effects.
    5593                 :            :                  *
    5594                 :            :                  * Returning the error value of any of the children that fail
    5595                 :            :                  * is not 100 % defined, in the sense that users seeing the
    5596                 :            :                  * error code won't be able to know anything about the state of
    5597                 :            :                  * the cache.
    5598                 :            :                  *
    5599                 :            :                  * Only returning the error code for the parent cache at least
    5600                 :            :                  * has well defined semantics. The cache being written to
    5601                 :            :                  * directly either failed or succeeded, in which case we loop
    5602                 :            :                  * through the descendants with best-effort propagation.
    5603                 :            :                  */
    5604                 :          0 :                 for_each_memcg_cache(c, s)
    5605                 :          0 :                         attribute->store(c, buf, len);
    5606                 :          0 :                 mutex_unlock(&slab_mutex);
    5607                 :            :         }
    5608                 :            : #endif
    5609                 :          0 :         return err;
    5610                 :            : }
    5611                 :            : 
    5612                 :          3 : static void memcg_propagate_slab_attrs(struct kmem_cache *s)
    5613                 :            : {
    5614                 :            : #ifdef CONFIG_MEMCG
    5615                 :            :         int i;
    5616                 :            :         char *buffer = NULL;
    5617                 :            :         struct kmem_cache *root_cache;
    5618                 :            : 
    5619                 :          3 :         if (is_root_cache(s))
    5620                 :            :                 return;
    5621                 :            : 
    5622                 :            :         root_cache = s->memcg_params.root_cache;
    5623                 :            : 
    5624                 :            :         /*
    5625                 :            :          * This mean this cache had no attribute written. Therefore, no point
    5626                 :            :          * in copying default values around
    5627                 :            :          */
    5628                 :          0 :         if (!root_cache->max_attr_size)
    5629                 :            :                 return;
    5630                 :            : 
    5631                 :          0 :         for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
    5632                 :            :                 char mbuf[64];
    5633                 :            :                 char *buf;
    5634                 :          0 :                 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
    5635                 :            :                 ssize_t len;
    5636                 :            : 
    5637                 :          0 :                 if (!attr || !attr->store || !attr->show)
    5638                 :          0 :                         continue;
    5639                 :            : 
    5640                 :            :                 /*
    5641                 :            :                  * It is really bad that we have to allocate here, so we will
    5642                 :            :                  * do it only as a fallback. If we actually allocate, though,
    5643                 :            :                  * we can just use the allocated buffer until the end.
    5644                 :            :                  *
    5645                 :            :                  * Most of the slub attributes will tend to be very small in
    5646                 :            :                  * size, but sysfs allows buffers up to a page, so they can
    5647                 :            :                  * theoretically happen.
    5648                 :            :                  */
    5649                 :          0 :                 if (buffer)
    5650                 :            :                         buf = buffer;
    5651                 :          0 :                 else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
    5652                 :            :                          !IS_ENABLED(CONFIG_SLUB_STATS))
    5653                 :            :                         buf = mbuf;
    5654                 :            :                 else {
    5655                 :          0 :                         buffer = (char *) get_zeroed_page(GFP_KERNEL);
    5656                 :          0 :                         if (WARN_ON(!buffer))
    5657                 :          0 :                                 continue;
    5658                 :            :                         buf = buffer;
    5659                 :            :                 }
    5660                 :            : 
    5661                 :          0 :                 len = attr->show(root_cache, buf);
    5662                 :          0 :                 if (len > 0)
    5663                 :          0 :                         attr->store(s, buf, len);
    5664                 :            :         }
    5665                 :            : 
    5666                 :          0 :         if (buffer)
    5667                 :          0 :                 free_page((unsigned long)buffer);
    5668                 :            : #endif  /* CONFIG_MEMCG */
    5669                 :            : }
    5670                 :            : 
    5671                 :          0 : static void kmem_cache_release(struct kobject *k)
    5672                 :            : {
    5673                 :          0 :         slab_kmem_cache_release(to_slab(k));
    5674                 :          0 : }
    5675                 :            : 
    5676                 :            : static const struct sysfs_ops slab_sysfs_ops = {
    5677                 :            :         .show = slab_attr_show,
    5678                 :            :         .store = slab_attr_store,
    5679                 :            : };
    5680                 :            : 
    5681                 :            : static struct kobj_type slab_ktype = {
    5682                 :            :         .sysfs_ops = &slab_sysfs_ops,
    5683                 :            :         .release = kmem_cache_release,
    5684                 :            : };
    5685                 :            : 
    5686                 :          3 : static int uevent_filter(struct kset *kset, struct kobject *kobj)
    5687                 :            : {
    5688                 :            :         struct kobj_type *ktype = get_ktype(kobj);
    5689                 :            : 
    5690                 :          3 :         if (ktype == &slab_ktype)
    5691                 :            :                 return 1;
    5692                 :          0 :         return 0;
    5693                 :            : }
    5694                 :            : 
    5695                 :            : static const struct kset_uevent_ops slab_uevent_ops = {
    5696                 :            :         .filter = uevent_filter,
    5697                 :            : };
    5698                 :            : 
    5699                 :            : static struct kset *slab_kset;
    5700                 :            : 
    5701                 :            : static inline struct kset *cache_kset(struct kmem_cache *s)
    5702                 :            : {
    5703                 :            : #ifdef CONFIG_MEMCG
    5704                 :          3 :         if (!is_root_cache(s))
    5705                 :          0 :                 return s->memcg_params.root_cache->memcg_kset;
    5706                 :            : #endif
    5707                 :          3 :         return slab_kset;
    5708                 :            : }
    5709                 :            : 
    5710                 :            : #define ID_STR_LENGTH 64
    5711                 :            : 
    5712                 :            : /* Create a unique string id for a slab cache:
    5713                 :            :  *
    5714                 :            :  * Format       :[flags-]size
    5715                 :            :  */
    5716                 :          3 : static char *create_unique_id(struct kmem_cache *s)
    5717                 :            : {
    5718                 :            :         char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
    5719                 :            :         char *p = name;
    5720                 :            : 
    5721                 :          3 :         BUG_ON(!name);
    5722                 :            : 
    5723                 :          3 :         *p++ = ':';
    5724                 :            :         /*
    5725                 :            :          * First flags affecting slabcache operations. We will only
    5726                 :            :          * get here for aliasable slabs so we do not need to support
    5727                 :            :          * too many flags. The flags here must cover all flags that
    5728                 :            :          * are matched during merging to guarantee that the id is
    5729                 :            :          * unique.
    5730                 :            :          */
    5731                 :          3 :         if (s->flags & SLAB_CACHE_DMA)
    5732                 :          0 :                 *p++ = 'd';
    5733                 :          3 :         if (s->flags & SLAB_CACHE_DMA32)
    5734                 :          0 :                 *p++ = 'D';
    5735                 :          3 :         if (s->flags & SLAB_RECLAIM_ACCOUNT)
    5736                 :          3 :                 *p++ = 'a';
    5737                 :          3 :         if (s->flags & SLAB_CONSISTENCY_CHECKS)
    5738                 :          0 :                 *p++ = 'F';
    5739                 :          3 :         if (s->flags & SLAB_ACCOUNT)
    5740                 :          3 :                 *p++ = 'A';
    5741                 :          3 :         if (p != name + 1)
    5742                 :          3 :                 *p++ = '-';
    5743                 :          3 :         p += sprintf(p, "%07u", s->size);
    5744                 :            : 
    5745                 :          3 :         BUG_ON(p > name + ID_STR_LENGTH - 1);
    5746                 :          3 :         return name;
    5747                 :            : }
    5748                 :            : 
    5749                 :          0 : static void sysfs_slab_remove_workfn(struct work_struct *work)
    5750                 :            : {
    5751                 :            :         struct kmem_cache *s =
    5752                 :            :                 container_of(work, struct kmem_cache, kobj_remove_work);
    5753                 :            : 
    5754                 :          0 :         if (!s->kobj.state_in_sysfs)
    5755                 :            :                 /*
    5756                 :            :                  * For a memcg cache, this may be called during
    5757                 :            :                  * deactivation and again on shutdown.  Remove only once.
    5758                 :            :                  * A cache is never shut down before deactivation is
    5759                 :            :                  * complete, so no need to worry about synchronization.
    5760                 :            :                  */
    5761                 :            :                 goto out;
    5762                 :            : 
    5763                 :            : #ifdef CONFIG_MEMCG
    5764                 :          0 :         kset_unregister(s->memcg_kset);
    5765                 :            : #endif
    5766                 :          0 :         kobject_uevent(&s->kobj, KOBJ_REMOVE);
    5767                 :            : out:
    5768                 :          0 :         kobject_put(&s->kobj);
    5769                 :          0 : }
    5770                 :            : 
    5771                 :          3 : static int sysfs_slab_add(struct kmem_cache *s)
    5772                 :            : {
    5773                 :            :         int err;
    5774                 :            :         const char *name;
    5775                 :            :         struct kset *kset = cache_kset(s);
    5776                 :          3 :         int unmergeable = slab_unmergeable(s);
    5777                 :            : 
    5778                 :          3 :         INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
    5779                 :            : 
    5780                 :          3 :         if (!kset) {
    5781                 :          0 :                 kobject_init(&s->kobj, &slab_ktype);
    5782                 :          0 :                 return 0;
    5783                 :            :         }
    5784                 :            : 
    5785                 :          3 :         if (!unmergeable && disable_higher_order_debug &&
    5786                 :          0 :                         (slub_debug & DEBUG_METADATA_FLAGS))
    5787                 :            :                 unmergeable = 1;
    5788                 :            : 
    5789                 :          3 :         if (unmergeable) {
    5790                 :            :                 /*
    5791                 :            :                  * Slabcache can never be merged so we can use the name proper.
    5792                 :            :                  * This is typically the case for debug situations. In that
    5793                 :            :                  * case we can catch duplicate names easily.
    5794                 :            :                  */
    5795                 :          3 :                 sysfs_remove_link(&slab_kset->kobj, s->name);
    5796                 :          3 :                 name = s->name;
    5797                 :            :         } else {
    5798                 :            :                 /*
    5799                 :            :                  * Create a unique name for the slab as a target
    5800                 :            :                  * for the symlinks.
    5801                 :            :                  */
    5802                 :          3 :                 name = create_unique_id(s);
    5803                 :            :         }
    5804                 :            : 
    5805                 :          3 :         s->kobj.kset = kset;
    5806                 :          3 :         err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
    5807                 :          3 :         if (err) {
    5808                 :          0 :                 kobject_put(&s->kobj);
    5809                 :          0 :                 goto out;
    5810                 :            :         }
    5811                 :            : 
    5812                 :          3 :         err = sysfs_create_group(&s->kobj, &slab_attr_group);
    5813                 :          3 :         if (err)
    5814                 :            :                 goto out_del_kobj;
    5815                 :            : 
    5816                 :            : #ifdef CONFIG_MEMCG
    5817                 :          3 :         if (is_root_cache(s) && memcg_sysfs_enabled) {
    5818                 :          0 :                 s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
    5819                 :          0 :                 if (!s->memcg_kset) {
    5820                 :            :                         err = -ENOMEM;
    5821                 :            :                         goto out_del_kobj;
    5822                 :            :                 }
    5823                 :            :         }
    5824                 :            : #endif
    5825                 :            : 
    5826                 :          3 :         kobject_uevent(&s->kobj, KOBJ_ADD);
    5827                 :          3 :         if (!unmergeable) {
    5828                 :            :                 /* Setup first alias */
    5829                 :          3 :                 sysfs_slab_alias(s, s->name);
    5830                 :            :         }
    5831                 :            : out:
    5832                 :          3 :         if (!unmergeable)
    5833                 :          3 :                 kfree(name);
    5834                 :          3 :         return err;
    5835                 :            : out_del_kobj:
    5836                 :          0 :         kobject_del(&s->kobj);
    5837                 :          0 :         goto out;
    5838                 :            : }
    5839                 :            : 
    5840                 :          0 : static void sysfs_slab_remove(struct kmem_cache *s)
    5841                 :            : {
    5842                 :          0 :         if (slab_state < FULL)
    5843                 :            :                 /*
    5844                 :            :                  * Sysfs has not been setup yet so no need to remove the
    5845                 :            :                  * cache from sysfs.
    5846                 :            :                  */
    5847                 :          0 :                 return;
    5848                 :            : 
    5849                 :          0 :         kobject_get(&s->kobj);
    5850                 :          0 :         schedule_work(&s->kobj_remove_work);
    5851                 :            : }
    5852                 :            : 
    5853                 :          0 : void sysfs_slab_unlink(struct kmem_cache *s)
    5854                 :            : {
    5855                 :          0 :         if (slab_state >= FULL)
    5856                 :          0 :                 kobject_del(&s->kobj);
    5857                 :          0 : }
    5858                 :            : 
    5859                 :          0 : void sysfs_slab_release(struct kmem_cache *s)
    5860                 :            : {
    5861                 :          0 :         if (slab_state >= FULL)
    5862                 :          0 :                 kobject_put(&s->kobj);
    5863                 :          0 : }
    5864                 :            : 
    5865                 :            : /*
    5866                 :            :  * Need to buffer aliases during bootup until sysfs becomes
    5867                 :            :  * available lest we lose that information.
    5868                 :            :  */
    5869                 :            : struct saved_alias {
    5870                 :            :         struct kmem_cache *s;
    5871                 :            :         const char *name;
    5872                 :            :         struct saved_alias *next;
    5873                 :            : };
    5874                 :            : 
    5875                 :            : static struct saved_alias *alias_list;
    5876                 :            : 
    5877                 :          3 : static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
    5878                 :            : {
    5879                 :            :         struct saved_alias *al;
    5880                 :            : 
    5881                 :          3 :         if (slab_state == FULL) {
    5882                 :            :                 /*
    5883                 :            :                  * If we have a leftover link then remove it.
    5884                 :            :                  */
    5885                 :          3 :                 sysfs_remove_link(&slab_kset->kobj, name);
    5886                 :          3 :                 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
    5887                 :            :         }
    5888                 :            : 
    5889                 :            :         al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
    5890                 :          3 :         if (!al)
    5891                 :            :                 return -ENOMEM;
    5892                 :            : 
    5893                 :          3 :         al->s = s;
    5894                 :          3 :         al->name = name;
    5895                 :          3 :         al->next = alias_list;
    5896                 :          3 :         alias_list = al;
    5897                 :          3 :         return 0;
    5898                 :            : }
    5899                 :            : 
    5900                 :          3 : static int __init slab_sysfs_init(void)
    5901                 :            : {
    5902                 :            :         struct kmem_cache *s;
    5903                 :            :         int err;
    5904                 :            : 
    5905                 :          3 :         mutex_lock(&slab_mutex);
    5906                 :            : 
    5907                 :          3 :         slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
    5908                 :          3 :         if (!slab_kset) {
    5909                 :          0 :                 mutex_unlock(&slab_mutex);
    5910                 :          0 :                 pr_err("Cannot register slab subsystem.\n");
    5911                 :          0 :                 return -ENOSYS;
    5912                 :            :         }
    5913                 :            : 
    5914                 :          3 :         slab_state = FULL;
    5915                 :            : 
    5916                 :          3 :         list_for_each_entry(s, &slab_caches, list) {
    5917                 :          3 :                 err = sysfs_slab_add(s);
    5918                 :          3 :                 if (err)
    5919                 :          0 :                         pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
    5920                 :            :                                s->name);
    5921                 :            :         }
    5922                 :            : 
    5923                 :          3 :         while (alias_list) {
    5924                 :            :                 struct saved_alias *al = alias_list;
    5925                 :            : 
    5926                 :          3 :                 alias_list = alias_list->next;
    5927                 :          3 :                 err = sysfs_slab_alias(al->s, al->name);
    5928                 :          3 :                 if (err)
    5929                 :          0 :                         pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
    5930                 :            :                                al->name);
    5931                 :          3 :                 kfree(al);
    5932                 :            :         }
    5933                 :            : 
    5934                 :          3 :         mutex_unlock(&slab_mutex);
    5935                 :            :         resiliency_test();
    5936                 :          3 :         return 0;
    5937                 :            : }
    5938                 :            : 
    5939                 :            : __initcall(slab_sysfs_init);
    5940                 :            : #endif /* CONFIG_SYSFS */
    5941                 :            : 
    5942                 :            : /*
    5943                 :            :  * The /proc/slabinfo ABI
    5944                 :            :  */
    5945                 :            : #ifdef CONFIG_SLUB_DEBUG
    5946                 :          0 : void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
    5947                 :            : {
    5948                 :            :         unsigned long nr_slabs = 0;
    5949                 :            :         unsigned long nr_objs = 0;
    5950                 :            :         unsigned long nr_free = 0;
    5951                 :            :         int node;
    5952                 :            :         struct kmem_cache_node *n;
    5953                 :            : 
    5954                 :          0 :         for_each_kmem_cache_node(s, node, n) {
    5955                 :          0 :                 nr_slabs += node_nr_slabs(n);
    5956                 :          0 :                 nr_objs += node_nr_objs(n);
    5957                 :          0 :                 nr_free += count_partial(n, count_free);
    5958                 :            :         }
    5959                 :            : 
    5960                 :          0 :         sinfo->active_objs = nr_objs - nr_free;
    5961                 :          0 :         sinfo->num_objs = nr_objs;
    5962                 :          0 :         sinfo->active_slabs = nr_slabs;
    5963                 :          0 :         sinfo->num_slabs = nr_slabs;
    5964                 :          0 :         sinfo->objects_per_slab = oo_objects(s->oo);
    5965                 :          0 :         sinfo->cache_order = oo_order(s->oo);
    5966                 :          0 : }
    5967                 :            : 
    5968                 :          0 : void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
    5969                 :            : {
    5970                 :          0 : }
    5971                 :            : 
    5972                 :          0 : ssize_t slabinfo_write(struct file *file, const char __user *buffer,
    5973                 :            :                        size_t count, loff_t *ppos)
    5974                 :            : {
    5975                 :          0 :         return -EIO;
    5976                 :            : }
    5977                 :            : #endif /* CONFIG_SLUB_DEBUG */

Generated by: LCOV version 1.14