1. 12 Jun, 2018 1 commit
    • Kees Cook's avatar
      treewide: kzalloc() -> kcalloc() · 6396bb22
      Kees Cook authored
      The kzalloc() function has a 2-factor argument form, kcalloc(). This
      patch replaces cases of:
      
              kzalloc(a * b, gfp)
      
      with:
              kcalloc(a * b, gfp)
      
      as well as handling cases of:
      
              kzalloc(a * b * c, gfp)
      
      with:
      
              kzalloc(array3_size(a, b, c), gfp)
      
      as it's slightly less ugly than:
      
              kzalloc_array(array_size(a, b), c, gfp)
      
      This does, however, attempt to ignore constant size factors like:
      
              kzalloc(4 * 1024, gfp)
      
      though any constants defined via macros get caught up in the conversion.
      
      Any factors with a sizeof() of "unsigned char", "char", and "u8" were
      dropped, since they're redundant.
      
      The Coccinelle script used for this was:
      
      // Fix redundant parens around sizeof().
      @@
      type TYPE;
      expression THING, E;
      @@
      
      (
        kzalloc(
      -	(sizeof(TYPE)) * E
      +	sizeof(TYPE) * E
        , ...)
      |
        kzalloc(
      -	(sizeof(THING)) * E
      +	sizeof(THING) * E
        , ...)
      )
      
      // Drop single-byte sizes and redundant parens.
      @@
      expression COUNT;
      typedef u8;
      typedef __u8;
      @@
      
      (
        kzalloc(
      -	sizeof(u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(__u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(char) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(unsigned char) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(u8) * COUNT
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(__u8) * COUNT
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(char) * COUNT
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(unsigned char) * COUNT
      +	COUNT
        , ...)
      )
      
      // 2-factor product with sizeof(type/expression) and identifier or constant.
      @@
      type TYPE;
      expression THING;
      identifier COUNT_ID;
      constant COUNT_CONST;
      @@
      
      (
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * (COUNT_ID)
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * COUNT_ID
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * COUNT_CONST
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * (COUNT_ID)
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * COUNT_ID
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * COUNT_CONST
      +	COUNT_CONST, sizeof(THING)
        , ...)
      )
      
      // 2-factor product, only identifiers.
      @@
      identifier SIZE, COUNT;
      @@
      
      - kzalloc
      + kcalloc
        (
      -	SIZE * COUNT
      +	COUNT, SIZE
        , ...)
      
      // 3-factor product with 1 sizeof(type) or sizeof(expression), with
      // redundant parens removed.
      @@
      expression THING;
      identifier STRIDE, COUNT;
      type TYPE;
      @@
      
      (
        kzalloc(
      -	sizeof(TYPE) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      )
      
      // 3-factor product with 2 sizeof(variable), with redundant parens removed.
      @@
      expression THING1, THING2;
      identifier COUNT;
      type TYPE1, TYPE2;
      @@
      
      (
        kzalloc(
      -	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kzalloc(
      -	sizeof(THING1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kzalloc(
      -	sizeof(THING1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      )
      
      // 3-factor product, only identifiers, with redundant parens removed.
      @@
      identifier STRIDE, SIZE, COUNT;
      @@
      
      (
        kzalloc(
      -	(COUNT) * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	(COUNT) * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	(COUNT) * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	(COUNT) * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      )
      
      // Any remaining multi-factor products, first at least 3-factor products,
      // when they're not all constants...
      @@
      expression E1, E2, E3;
      constant C1, C2, C3;
      @@
      
      (
        kzalloc(C1 * C2 * C3, ...)
      |
        kzalloc(
      -	(E1) * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kzalloc(
      -	(E1) * (E2) * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kzalloc(
      -	(E1) * (E2) * (E3)
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kzalloc(
      -	E1 * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      )
      
      // And then all remaining 2 factors products when they're not all constants,
      // keeping sizeof() as the second factor argument.
      @@
      expression THING, E1, E2;
      type TYPE;
      constant C1, C2, C3;
      @@
      
      (
        kzalloc(sizeof(THING) * C2, ...)
      |
        kzalloc(sizeof(TYPE) * C2, ...)
      |
        kzalloc(C1 * C2 * C3, ...)
      |
        kzalloc(C1 * C2, ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * (E2)
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * E2
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * (E2)
      +	E2, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * E2
      +	E2, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	(E1) * E2
      +	E1, E2
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	(E1) * (E2)
      +	E1, E2
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	E1 * E2
      +	E1, E2
        , ...)
      )
      Signed-off-by: 's avatarKees Cook <keescook@chromium.org>
      6396bb22
  2. 08 Jun, 2018 2 commits
  3. 14 Apr, 2018 1 commit
    • Vlastimil Babka's avatar
      mm, slab: reschedule cache_reap() on the same CPU · a9f2a846
      Vlastimil Babka authored
      cache_reap() is initially scheduled in start_cpu_timer() via
      schedule_delayed_work_on(). But then the next iterations are scheduled
      via schedule_delayed_work(), i.e. using WORK_CPU_UNBOUND.
      
      Thus since commit ef557180 ("workqueue: schedule WORK_CPU_UNBOUND
      work on wq_unbound_cpumask CPUs") there is no guarantee the future
      iterations will run on the originally intended cpu, although it's still
      preferred.  I was able to demonstrate this with
      /sys/module/workqueue/parameters/debug_force_rr_cpu.  IIUC, it may also
      happen due to migrating timers in nohz context.  As a result, some cpu's
      would be calling cache_reap() more frequently and others never.
      
      This patch uses schedule_delayed_work_on() with the current cpu when
      scheduling the next iteration.
      
      Link: http://lkml.kernel.org/r/20180411070007.32225-1-vbabka@suse.cz
      Fixes: ef557180 ("workqueue: schedule WORK_CPU_UNBOUND work on wq_unbound_cpumask CPUs")
      Signed-off-by: 's avatarVlastimil Babka <vbabka@suse.cz>
      Acked-by: 's avatarPekka Enberg <penberg@kernel.org>
      Acked-by: 's avatarChristoph Lameter <cl@linux.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Lai Jiangshan <jiangshanlai@gmail.com>
      Cc: John Stultz <john.stultz@linaro.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Stephen Boyd <sboyd@kernel.org>
      Cc: <stable@vger.kernel.org>
      Signed-off-by: 's avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: 's avatarLinus Torvalds <torvalds@linux-foundation.org>
      a9f2a846
  4. 06 Apr, 2018 4 commits
  5. 28 Mar, 2018 1 commit
    • Shakeel Butt's avatar
      mm, slab: memcg_link the SLAB's kmem_cache · 880cd276
      Shakeel Butt authored
      All the root caches are linked into slab_root_caches which was
      introduced by the commit 510ded33 ("slab: implement slab_root_caches
      list") but it missed to add the SLAB's kmem_cache.
      
      While experimenting with opt-in/opt-out kmem accounting, I noticed
      system crashes due to NULL dereference inside cache_from_memcg_idx()
      while deferencing kmem_cache.memcg_params.memcg_caches.  The upstream
      clean kernel will not see these crashes but SLAB should be consistent
      with SLUB which does linked its boot caches (kmem_cache_node and
      kmem_cache) into slab_root_caches.
      
      Link: http://lkml.kernel.org/r/20180319210020.60289-1-shakeelb@google.com
      Fixes: 510ded33 ("slab: implement slab_root_caches list")
      Signed-off-by: 's avatarShakeel Butt <shakeelb@google.com>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
      Cc: Greg Thelen <gthelen@google.com>
      Cc: Christoph Lameter <cl@linux.com>
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Cc: Michal Hocko <mhocko@kernel.org>
      Signed-off-by: 's avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: 's avatarLinus Torvalds <torvalds@linux-foundation.org>
      880cd276
  6. 07 Feb, 2018 1 commit
  7. 01 Feb, 2018 1 commit
  8. 15 Jan, 2018 5 commits
    • David Windsor's avatar
      usercopy: Mark kmalloc caches as usercopy caches · 6c0c21ad
      David Windsor authored
      Mark the kmalloc slab caches as entirely whitelisted. These caches
      are frequently used to fulfill kernel allocations that contain data
      to be copied to/from userspace. Internal-only uses are also common,
      but are scattered in the kernel. For now, mark all the kmalloc caches
      as whitelisted.
      
      This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY
      whitelisting code in the last public patch of grsecurity/PaX based on my
      understanding of the code. Changes or omissions from the original code are
      mine and don't reflect the original grsecurity/PaX code.
      Signed-off-by: 's avatarDavid Windsor <dave@nullcore.net>
      [kees: merged in moved kmalloc hunks, adjust commit log]
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: linux-mm@kvack.org
      Cc: linux-xfs@vger.kernel.org
      Signed-off-by: 's avatarKees Cook <keescook@chromium.org>
      Acked-by: 's avatarChristoph Lameter <cl@linux.com>
      6c0c21ad
    • Kees Cook's avatar
      usercopy: Allow strict enforcement of whitelists · 2d891fbc
      Kees Cook authored
      This introduces CONFIG_HARDENED_USERCOPY_FALLBACK to control the
      behavior of hardened usercopy whitelist violations. By default, whitelist
      violations will continue to WARN() so that any bad or missing usercopy
      whitelists can be discovered without being too disruptive.
      
      If this config is disabled at build time or a system is booted with
      "slab_common.usercopy_fallback=0", usercopy whitelists will BUG() instead
      of WARN(). This is useful for admins that want to use usercopy whitelists
      immediately.
      Suggested-by: 's avatarMatthew Garrett <mjg59@google.com>
      Signed-off-by: 's avatarKees Cook <keescook@chromium.org>
      2d891fbc
    • Kees Cook's avatar
      usercopy: WARN() on slab cache usercopy region violations · afcc90f8
      Kees Cook authored
      This patch adds checking of usercopy cache whitelisting, and is modified
      from Brad Spengler/PaX Team's PAX_USERCOPY whitelisting code in the
      last public patch of grsecurity/PaX based on my understanding of the
      code. Changes or omissions from the original code are mine and don't
      reflect the original grsecurity/PaX code.
      
      The SLAB and SLUB allocators are modified to WARN() on all copy operations
      in which the kernel heap memory being modified falls outside of the cache's
      defined usercopy region.
      
      Based on an earlier patch from David Windsor.
      
      Cc: Christoph Lameter <cl@linux.com>
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: Laura Abbott <labbott@redhat.com>
      Cc: Ingo Molnar <mingo@kernel.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: linux-mm@kvack.org
      Cc: linux-xfs@vger.kernel.org
      Signed-off-by: 's avatarKees Cook <keescook@chromium.org>
      afcc90f8
    • David Windsor's avatar
      usercopy: Prepare for usercopy whitelisting · 8eb8284b
      David Windsor authored
      This patch prepares the slab allocator to handle caches having annotations
      (useroffset and usersize) defining usercopy regions.
      
      This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY
      whitelisting code in the last public patch of grsecurity/PaX based on
      my understanding of the code. Changes or omissions from the original
      code are mine and don't reflect the original grsecurity/PaX code.
      
      Currently, hardened usercopy performs dynamic bounds checking on slab
      cache objects. This is good, but still leaves a lot of kernel memory
      available to be copied to/from userspace in the face of bugs. To further
      restrict what memory is available for copying, this creates a way to
      whitelist specific areas of a given slab cache object for copying to/from
      userspace, allowing much finer granularity of access control. Slab caches
      that are never exposed to userspace can declare no whitelist for their
      objects, thereby keeping them unavailable to userspace via dynamic copy
      operations. (Note, an implicit form of whitelisting is the use of constant
      sizes in usercopy operations and get_user()/put_user(); these bypass
      hardened usercopy checks since these sizes cannot change at runtime.)
      
      To support this whitelist annotation, usercopy region offset and size
      members are added to struct kmem_cache. The slab allocator receives a
      new function, kmem_cache_create_usercopy(), that creates a new cache
      with a usercopy region defined, suitable for declaring spans of fields
      within the objects that get copied to/from userspace.
      
      In this patch, the default kmem_cache_create() marks the entire allocation
      as whitelisted, leaving it semantically unchanged. Once all fine-grained
      whitelists have been added (in subsequent patches), this will be changed
      to a usersize of 0, making caches created with kmem_cache_create() not
      copyable to/from userspace.
      
      After the entire usercopy whitelist series is applied, less than 15%
      of the slab cache memory remains exposed to potential usercopy bugs
      after a fresh boot:
      
      Total Slab Memory:           48074720
      Usercopyable Memory:          6367532  13.2%
               task_struct                    0.2%         4480/1630720
               RAW                            0.3%            300/96000
               RAWv6                          2.1%           1408/64768
               ext4_inode_cache               3.0%       269760/8740224
               dentry                        11.1%       585984/5273856
               mm_struct                     29.1%         54912/188448
               kmalloc-8                    100.0%          24576/24576
               kmalloc-16                   100.0%          28672/28672
               kmalloc-32                   100.0%          81920/81920
               kmalloc-192                  100.0%          96768/96768
               kmalloc-128                  100.0%        143360/143360
               names_cache                  100.0%        163840/163840
               kmalloc-64                   100.0%        167936/167936
               kmalloc-256                  100.0%        339968/339968
               kmalloc-512                  100.0%        350720/350720
               kmalloc-96                   100.0%        455616/455616
               kmalloc-8192                 100.0%        655360/655360
               kmalloc-1024                 100.0%        812032/812032
               kmalloc-4096                 100.0%        819200/819200
               kmalloc-2048                 100.0%      1310720/1310720
      
      After some kernel build workloads, the percentage (mainly driven by
      dentry and inode caches expanding) drops under 10%:
      
      Total Slab Memory:           95516184
      Usercopyable Memory:          8497452   8.8%
               task_struct                    0.2%         4000/1456000
               RAW                            0.3%            300/96000
               RAWv6                          2.1%           1408/64768
               ext4_inode_cache               3.0%     1217280/39439872
               dentry                        11.1%     1623200/14608800
               mm_struct                     29.1%         73216/251264
               kmalloc-8                    100.0%          24576/24576
               kmalloc-16                   100.0%          28672/28672
               kmalloc-32                   100.0%          94208/94208
               kmalloc-192                  100.0%          96768/96768
               kmalloc-128                  100.0%        143360/143360
               names_cache                  100.0%        163840/163840
               kmalloc-64                   100.0%        245760/245760
               kmalloc-256                  100.0%        339968/339968
               kmalloc-512                  100.0%        350720/350720
               kmalloc-96                   100.0%        563520/563520
               kmalloc-8192                 100.0%        655360/655360
               kmalloc-1024                 100.0%        794624/794624
               kmalloc-4096                 100.0%        819200/819200
               kmalloc-2048                 100.0%      1257472/1257472
      Signed-off-by: 's avatarDavid Windsor <dave@nullcore.net>
      [kees: adjust commit log, split out a few extra kmalloc hunks]
      [kees: add field names to function declarations]
      [kees: convert BUGs to WARNs and fail closed]
      [kees: add attack surface reduction analysis to commit log]
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: linux-mm@kvack.org
      Cc: linux-xfs@vger.kernel.org
      Signed-off-by: 's avatarKees Cook <keescook@chromium.org>
      Acked-by: 's avatarChristoph Lameter <cl@linux.com>
      8eb8284b
    • Kees Cook's avatar
      usercopy: Include offset in hardened usercopy report · f4e6e289
      Kees Cook authored
      This refactors the hardened usercopy code so that failure reporting can
      happen within the checking functions instead of at the top level. This
      simplifies the return value handling and allows more details and offsets
      to be included in the report. Having the offset can be much more helpful
      in understanding hardened usercopy bugs.
      Signed-off-by: 's avatarKees Cook <keescook@chromium.org>
      f4e6e289
  9. 15 Dec, 2017 1 commit
  10. 16 Nov, 2017 6 commits
  11. 02 Nov, 2017 1 commit
    • Greg Kroah-Hartman's avatar
      License cleanup: add SPDX GPL-2.0 license identifier to files with no license · b2441318
      Greg Kroah-Hartman authored
      Many source files in the tree are missing licensing information, which
      makes it harder for compliance tools to determine the correct license.
      
      By default all files without license information are under the default
      license of the kernel, which is GPL version 2.
      
      Update the files which contain no license information with the 'GPL-2.0'
      SPDX license identifier.  The SPDX identifier is a legally binding
      shorthand, which can be used instead of the full boiler plate text.
      
      This patch is based on work done by Thomas Gleixner and Kate Stewart and
      Philippe Ombredanne.
      
      How this work was done:
      
      Patches were generated and checked against linux-4.14-rc6 for a subset of
      the use cases:
       - file had no licensing information it it.
       - file was a */uapi/* one with no licensing information in it,
       - file was a */uapi/* one with existing licensing information,
      
      Further patches will be generated in subsequent months to fix up cases
      where non-standard license headers were used, and references to license
      had to be inferred by heuristics based on keywords.
      
      The analysis to determine which SPDX License Identifier to be applied to
      a file was done in a spreadsheet of side by side results from of the
      output of two independent scanners (ScanCode & Windriver) producing SPDX
      tag:value files created by Philippe Ombredanne.  Philippe prepared the
      base worksheet, and did an initial spot review of a few 1000 files.
      
      The 4.13 kernel was the starting point of the analysis with 60,537 files
      assessed.  Kate Stewart did a file by file comparison of the scanner
      results in the spreadsheet to determine which SPDX license identifier(s)
      to be applied to the file. She confirmed any determination that was not
      immediately clear with lawyers working with the Linux Foundation.
      
      Criteria used to select files for SPDX license identifier tagging was:
       - Files considered eligible had to be source code files.
       - Make and config files were included as candidates if they contained >5
         lines of source
       - File already had some variant of a license header in it (even if <5
         lines).
      
      All documentation files were explicitly excluded.
      
      The following heuristics were used to determine which SPDX license
      identifiers to apply.
      
       - when both scanners couldn't find any license traces, file was
         considered to have no license information in it, and the top level
         COPYING file license applied.
      
         For non */uapi/* files that summary was:
      
         SPDX license identifier                            # files
         ---------------------------------------------------|-------
         GPL-2.0                                              11139
      
         and resulted in the first patch in this series.
      
         If that file was a */uapi/* path one, it was "GPL-2.0 WITH
         Linux-syscall-note" otherwise it was "GPL-2.0".  Results of that was:
      
         SPDX license identifier                            # files
         ---------------------------------------------------|-------
         GPL-2.0 WITH Linux-syscall-note                        930
      
         and resulted in the second patch in this series.
      
       - if a file had some form of licensing information in it, and was one
         of the */uapi/* ones, it was denoted with the Linux-syscall-note if
         any GPL family license was found in the file or had no licensing in
         it (per prior point).  Results summary:
      
         SPDX license identifier                            # files
         ---------------------------------------------------|------
         GPL-2.0 WITH Linux-syscall-note                       270
         GPL-2.0+ WITH Linux-syscall-note                      169
         ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause)    21
         ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)    17
         LGPL-2.1+ WITH Linux-syscall-note                      15
         GPL-1.0+ WITH Linux-syscall-note                       14
         ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause)    5
         LGPL-2.0+ WITH Linux-syscall-note                       4
         LGPL-2.1 WITH Linux-syscall-note                        3
         ((GPL-2.0 WITH Linux-syscall-note) OR MIT)              3
         ((GPL-2.0 WITH Linux-syscall-note) AND MIT)             1
      
         and that resulted in the third patch in this series.
      
       - when the two scanners agreed on the detected license(s), that became
         the concluded license(s).
      
       - when there was disagreement between the two scanners (one detected a
         license but the other didn't, or they both detected different
         licenses) a manual inspection of the file occurred.
      
       - In most cases a manual inspection of the information in the file
         resulted in a clear resolution of the license that should apply (and
         which scanner probably needed to revisit its heuristics).
      
       - When it was not immediately clear, the license identifier was
         confirmed with lawyers working with the Linux Foundation.
      
       - If there was any question as to the appropriate license identifier,
         the file was flagged for further research and to be revisited later
         in time.
      
      In total, over 70 hours of logged manual review was done on the
      spreadsheet to determine the SPDX license identifiers to apply to the
      source files by Kate, Philippe, Thomas and, in some cases, confirmation
      by lawyers working with the Linux Foundation.
      
      Kate also obtained a third independent scan of the 4.13 code base from
      FOSSology, and compared selected files where the other two scanners
      disagreed against that SPDX file, to see if there was new insights.  The
      Windriver scanner is based on an older version of FOSSology in part, so
      they are related.
      
      Thomas did random spot checks in about 500 files from the spreadsheets
      for the uapi headers and agreed with SPDX license identifier in the
      files he inspected. For the non-uapi files Thomas did random spot checks
      in about 15000 files.
      
      In initial set of patches against 4.14-rc6, 3 files were found to have
      copy/paste license identifier errors, and have been fixed to reflect the
      correct identifier.
      
      Additionally Philippe spent 10 hours this week doing a detailed manual
      inspection and review of the 12,461 patched files from the initial patch
      version early this week with:
       - a full scancode scan run, collecting the matched texts, detected
         license ids and scores
       - reviewing anything where there was a license detected (about 500+
         files) to ensure that the applied SPDX license was correct
       - reviewing anything where there was no detection but the patch license
         was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied
         SPDX license was correct
      
      This produced a worksheet with 20 files needing minor correction.  This
      worksheet was then exported into 3 different .csv files for the
      different types of files to be modified.
      
      These .csv files were then reviewed by Greg.  Thomas wrote a script to
      parse the csv files and add the proper SPDX tag to the file, in the
      format that the file expected.  This script was further refined by Greg
      based on the output to detect more types of files automatically and to
      distinguish between header and source .c files (which need different
      comment types.)  Finally Greg ran the script using the .csv files to
      generate the patches.
      Reviewed-by: 's avatarKate Stewart <kstewart@linuxfoundation.org>
      Reviewed-by: 's avatarPhilippe Ombredanne <pombredanne@nexb.com>
      Reviewed-by: 's avatarThomas Gleixner <tglx@linutronix.de>
      Signed-off-by: 's avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      b2441318
  12. 06 Jul, 2017 3 commits
  13. 03 May, 2017 1 commit
    • Greg Thelen's avatar
      slab: avoid IPIs when creating kmem caches · a87c75fb
      Greg Thelen authored
      Each slab kmem cache has per cpu array caches.  The array caches are
      created when the kmem_cache is created, either via kmem_cache_create()
      or lazily when the first object is allocated in context of a kmem
      enabled memcg.  Array caches are replaced by writing to /proc/slabinfo.
      
      Array caches are protected by holding slab_mutex or disabling
      interrupts.  Array cache allocation and replacement is done by
      __do_tune_cpucache() which holds slab_mutex and calls
      kick_all_cpus_sync() to interrupt all remote processors which confirms
      there are no references to the old array caches.
      
      IPIs are needed when replacing array caches.  But when creating a new
      array cache, there's no need to send IPIs because there cannot be any
      references to the new cache.  Outside of memcg kmem accounting these
      IPIs occur at boot time, so they're not a problem.  But with memcg kmem
      accounting each container can create kmem caches, so the IPIs are
      wasteful.
      
      Avoid unnecessary IPIs when creating array caches.
      
      Test which reports the IPI count of allocating slab in 10000 memcg:
      
      	import os
      
      	def ipi_count():
      		with open("/proc/interrupts") as f:
      			for l in f:
      				if 'Function call interrupts' in l:
      					return int(l.split()[1])
      
      	def echo(val, path):
      		with open(path, "w") as f:
      			f.write(val)
      
      	n = 10000
      	os.chdir("/mnt/cgroup/memory")
      	pid = str(os.getpid())
      	a = ipi_count()
      	for i in range(n):
      		os.mkdir(str(i))
      		echo("1G\n", "%d/memory.limit_in_bytes" % i)
      		echo("1G\n", "%d/memory.kmem.limit_in_bytes" % i)
      		echo(pid, "%d/cgroup.procs" % i)
      		open("/tmp/x", "w").close()
      		os.unlink("/tmp/x")
      	b = ipi_count()
      	print "%d loops: %d => %d (+%d ipis)" % (n, a, b, b-a)
      	echo(pid, "cgroup.procs")
      	for i in range(n):
      		os.rmdir(str(i))
      
      patched:   10000 loops: 1069 => 1170 (+101 ipis)
      unpatched: 10000 loops: 1192 => 48933 (+47741 ipis)
      
      Link: http://lkml.kernel.org/r/20170416214544.109476-1-gthelen@google.comSigned-off-by: 's avatarGreg Thelen <gthelen@google.com>
      Acked-by: 's avatarJoonsoo Kim <iamjoonsoo.kim@lge.com>
      Acked-by: 's avatarDavid Rientjes <rientjes@google.com>
      Cc: Christoph Lameter <cl@linux.com>
      Cc: Pekka Enberg <penberg@kernel.org>
      Signed-off-by: 's avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: 's avatarLinus Torvalds <torvalds@linux-foundation.org>
      a87c75fb
  14. 18 Apr, 2017 1 commit
    • Paul E. McKenney's avatar
      mm: Rename SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU · 5f0d5a3a
      Paul E. McKenney authored
      A group of Linux kernel hackers reported chasing a bug that resulted
      from their assumption that SLAB_DESTROY_BY_RCU provided an existence
      guarantee, that is, that no block from such a slab would be reallocated
      during an RCU read-side critical section.  Of course, that is not the
      case.  Instead, SLAB_DESTROY_BY_RCU only prevents freeing of an entire
      slab of blocks.
      
      However, there is a phrase for this, namely "type safety".  This commit
      therefore renames SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU in order
      to avoid future instances of this sort of confusion.
      Signed-off-by: 's avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
      Cc: Christoph Lameter <cl@linux.com>
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: <linux-mm@kvack.org>
      Acked-by: 's avatarJohannes Weiner <hannes@cmpxchg.org>
      Acked-by: 's avatarVlastimil Babka <vbabka@suse.cz>
      [ paulmck: Add comments mentioning the old name, as requested by Eric
        Dumazet, in order to help people familiar with the old name find
        the new one. ]
      Acked-by: 's avatarDavid Rientjes <rientjes@google.com>
      5f0d5a3a
  15. 02 Mar, 2017 1 commit
  16. 23 Feb, 2017 3 commits
    • Tejun Heo's avatar
      slab: introduce __kmemcg_cache_deactivate() · c9fc5864
      Tejun Heo authored
      __kmem_cache_shrink() is called with %true @deactivate only for memcg
      caches.  Remove @deactivate from __kmem_cache_shrink() and introduce
      __kmemcg_cache_deactivate() instead.  Each memcg-supporting allocator
      should implement it and it should deactivate and drain the cache.
      
      This is to allow memcg cache deactivation behavior to further deviate
      from simple shrinking without messing up __kmem_cache_shrink().
      
      This is pure reorganization and doesn't introduce any observable
      behavior changes.
      
      v2: Dropped unnecessary ifdef in mm/slab.h as suggested by Vladimir.
      
      Link: http://lkml.kernel.org/r/20170117235411.9408-8-tj@kernel.orgSigned-off-by: 's avatarTejun Heo <tj@kernel.org>
      Acked-by: 's avatarVladimir Davydov <vdavydov.dev@gmail.com>
      Cc: Christoph Lameter <cl@linux.com>
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Signed-off-by: 's avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: 's avatarLinus Torvalds <torvalds@linux-foundation.org>
      c9fc5864
    • Tejun Heo's avatar
      Revert "slub: move synchronize_sched out of slab_mutex on shrink" · 290b6a58
      Tejun Heo authored
      Patch series "slab: make memcg slab destruction scalable", v3.
      
      With kmem cgroup support enabled, kmem_caches can be created and
      destroyed frequently and a great number of near empty kmem_caches can
      accumulate if there are a lot of transient cgroups and the system is not
      under memory pressure.  When memory reclaim starts under such
      conditions, it can lead to consecutive deactivation and destruction of
      many kmem_caches, easily hundreds of thousands on moderately large
      systems, exposing scalability issues in the current slab management
      code.
      
      I've seen machines which end up with hundred thousands of caches and
      many millions of kernfs_nodes.  The current code is O(N^2) on the total
      number of caches and has synchronous rcu_barrier() and
      synchronize_sched() in cgroup offline / release path which is executed
      while holding cgroup_mutex.  Combined, this leads to very expensive and
      slow cache destruction operations which can easily keep running for half
      a day.
      
      This also messes up /proc/slabinfo along with other cache iterating
      operations.  seq_file operates on 4k chunks and on each 4k boundary
      tries to seek to the last position in the list.  With a huge number of
      caches on the list, this becomes very slow and very prone to the list
      content changing underneath it leading to a lot of missing and/or
      duplicate entries.
      
      This patchset addresses the scalability problem.
      
      * Add root and per-memcg lists.  Update each user to use the
        appropriate list.
      
      * Make rcu_barrier() for SLAB_DESTROY_BY_RCU caches globally batched
        and asynchronous.
      
      * For dying empty slub caches, remove the sysfs files after
        deactivation so that we don't end up with millions of sysfs files
        without any useful information on them.
      
      This patchset contains the following nine patches.
      
       0001-Revert-slub-move-synchronize_sched-out-of-slab_mutex.patch
       0002-slub-separate-out-sysfs_slab_release-from-sysfs_slab.patch
       0003-slab-remove-synchronous-rcu_barrier-call-in-memcg-ca.patch
       0004-slab-reorganize-memcg_cache_params.patch
       0005-slab-link-memcg-kmem_caches-on-their-associated-memo.patch
       0006-slab-implement-slab_root_caches-list.patch
       0007-slab-introduce-__kmemcg_cache_deactivate.patch
       0008-slab-remove-synchronous-synchronize_sched-from-memcg.patch
       0009-slab-remove-slub-sysfs-interface-files-early-for-emp.patch
       0010-slab-use-memcg_kmem_cache_wq-for-slab-destruction-op.patch
      
      0001 reverts an existing optimization to prepare for the following
      changes.  0002 is a prep patch.  0003 makes rcu_barrier() in release
      path batched and asynchronous.  0004-0006 separate out the lists.
      0007-0008 replace synchronize_sched() in slub destruction path with
      call_rcu_sched().  0009 removes sysfs files early for empty dying
      caches.  0010 makes destruction work items use a workqueue with limited
      concurrency.
      
      This patch (of 10):
      
      Revert 89e364db ("slub: move synchronize_sched out of slab_mutex on
      shrink").
      
      With kmem cgroup support enabled, kmem_caches can be created and destroyed
      frequently and a great number of near empty kmem_caches can accumulate if
      there are a lot of transient cgroups and the system is not under memory
      pressure.  When memory reclaim starts under such conditions, it can lead
      to consecutive deactivation and destruction of many kmem_caches, easily
      hundreds of thousands on moderately large systems, exposing scalability
      issues in the current slab management code.  This is one of the patches to
      address the issue.
      
      Moving synchronize_sched() out of slab_mutex isn't enough as it's still
      inside cgroup_mutex.  The whole deactivation / release path will be
      updated to avoid all synchronous RCU operations.  Revert this insufficient
      optimization in preparation to ease future changes.
      
      Link: http://lkml.kernel.org/r/20170117235411.9408-2-tj@kernel.orgSigned-off-by: 's avatarTejun Heo <tj@kernel.org>
      Reported-by: 's avatarJay Vana <jsvana@fb.com>
      Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
      Cc: Christoph Lameter <cl@linux.com>
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Signed-off-by: 's avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: 's avatarLinus Torvalds <torvalds@linux-foundation.org>
      290b6a58
    • Vlastimil Babka's avatar
      mm, slab: rename kmalloc-node cache to kmalloc-<size> · af3b5f87
      Vlastimil Babka authored
      SLAB as part of its bootstrap pre-creates one kmalloc cache that can fit
      the kmem_cache_node management structure, and puts it into the generic
      kmalloc cache array (e.g. for 128b objects).  The name of this cache is
      "kmalloc-node", which is confusing for readers of /proc/slabinfo as the
      cache is used for generic allocations (and not just the kmem_cache_node
      struct) and it appears as the kmalloc-128 cache is missing.
      
      An easy solution is to use the kmalloc-<size> name when pre-creating the
      cache, which we can get from the kmalloc_info array.
      
      Example /proc/slabinfo before the patch:
      
        ...
        kmalloc-256         1647   1984    256   16    1 : tunables  120   60    8 : slabdata    124    124    828
        kmalloc-192         1974   1974    192   21    1 : tunables  120   60    8 : slabdata     94     94    133
        kmalloc-96          1332   1344    128   32    1 : tunables  120   60    8 : slabdata     42     42    219
        kmalloc-64          2505   5952     64   64    1 : tunables  120   60    8 : slabdata     93     93    715
        kmalloc-32          4278   4464     32  124    1 : tunables  120   60    8 : slabdata     36     36    346
        kmalloc-node        1352   1376    128   32    1 : tunables  120   60    8 : slabdata     43     43     53
        kmem_cache           132    147    192   21    1 : tunables  120   60    8 : slabdata      7      7      0
      
      After the patch:
      
        ...
        kmalloc-256         1672   2160    256   16    1 : tunables  120   60    8 : slabdata    135    135    807
        kmalloc-192         1992   2016    192   21    1 : tunables  120   60    8 : slabdata     96     96    203
        kmalloc-96          1159   1184    128   32    1 : tunables  120   60    8 : slabdata     37     37    116
        kmalloc-64          2561   4864     64   64    1 : tunables  120   60    8 : slabdata     76     76    785
        kmalloc-32          4253   4340     32  124    1 : tunables  120   60    8 : slabdata     35     35    270
        kmalloc-128         1256   1280    128   32    1 : tunables  120   60    8 : slabdata     40     40     39
        kmem_cache           125    147    192   21    1 : tunables  120   60    8 : slabdata      7      7      0
      
      [vbabka@suse.cz: export the whole kmalloc_info structure instead of just a name accessor, per Christoph Lameter]
        Link: http://lkml.kernel.org/r/54e80303-b814-4232-66d4-95b34d3eb9d0@suse.cz
      Link: http://lkml.kernel.org/r/20170203181008.24898-1-vbabka@suse.czSigned-off-by: 's avatarVlastimil Babka <vbabka@suse.cz>
      Reviewed-by: 's avatarMatthew Wilcox <mawilcox@microsoft.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: Christoph Lameter <cl@linux.com>
      Signed-off-by: 's avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: 's avatarLinus Torvalds <torvalds@linux-foundation.org>
      af3b5f87
  17. 11 Jan, 2017 1 commit
  18. 13 Dec, 2016 3 commits
  19. 28 Oct, 2016 2 commits
  20. 17 Sep, 2016 1 commit
    • Tejun Heo's avatar
      slab, workqueue: remove keventd_up() usage · eac0337a
      Tejun Heo authored
      Now that workqueue can handle work item queueing from very early
      during boot, there is no need to gate schedule_delayed_work_on() while
      !keventd_up().  Remove it.
      Signed-off-by: 's avatarTejun Heo <tj@kernel.org>
      Cc: Christoph Lameter <cl@linux.com>
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: linux-mm@kvack.org
      eac0337a