Commit ca1a46d6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'slab-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

 - Separate struct slab from struct page - an offshot of the page folio
   work.

   Struct page fields used by slab allocators are moved from struct page
   to a new struct slab, that uses the same physical storage. Similar to
   struct folio, it always is a head page. This brings better type
   safety, separation of large kmalloc allocations from true slabs, and
   cleanup of related objcg code.

 - A SLAB_MERGE_DEFAULT config optimization.

* tag 'slab-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (33 commits)
  mm/slob: Remove unnecessary page_mapcount_reset() function call
  bootmem: Use page->index instead of page->freelist
  zsmalloc: Stop using slab fields in struct page
  mm/slub: Define struct slab fields for CONFIG_SLUB_CPU_PARTIAL only when enabled
  mm/slub: Simplify struct slab slabs field definition
  mm/sl*b: Differentiate struct slab fields by sl*b implementations
  mm/kfence: Convert kfence_guarded_alloc() to struct slab
  mm/kasan: Convert to struct folio and struct slab
  mm/slob: Convert SLOB to use struct slab and struct folio
  mm/memcg: Convert slab objcgs from struct page to struct slab
  mm: Convert struct page to struct slab in functions used by other subsystems
  mm/slab: Finish struct page to struct slab conversion
  mm/slab: Convert most struct page to struct slab by spatch
  mm/slab: Convert kmem_getpages() and kmem_freepages() to struct slab
  mm/slub: Finish struct page to struct slab conversion
  mm/slub: Convert most struct page to struct slab by spatch
  mm/slub: Convert pfmemalloc_match() to take a struct slab
  mm/slub: Convert __free_slab() to use struct slab
  mm/slub: Convert alloc_slab_page() to return a struct slab
  mm/slub: Convert print_page_info() to print_slab_info()
  ...
parents d93aebbd 9d6c59c1
......@@ -981,7 +981,7 @@ static void __meminit free_pagetable(struct page *page, int order)
if (PageReserved(page)) {
__ClearPageReserved(page);
magic = (unsigned long)page->freelist;
magic = page->index;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
......
......@@ -30,7 +30,7 @@ void put_page_bootmem(struct page *page);
*/
static inline void free_bootmem_page(struct page *page)
{
unsigned long magic = (unsigned long)page->freelist;
unsigned long magic = page->index;
/*
* The reserve_bootmem_region sets the reserved flag on bootmem
......
......@@ -9,6 +9,7 @@
struct kmem_cache;
struct page;
struct slab;
struct vm_struct;
struct task_struct;
......@@ -193,11 +194,11 @@ static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
return 0;
}
void __kasan_poison_slab(struct page *page);
static __always_inline void kasan_poison_slab(struct page *page)
void __kasan_poison_slab(struct slab *slab);
static __always_inline void kasan_poison_slab(struct slab *slab)
{
if (kasan_enabled())
__kasan_poison_slab(page);
__kasan_poison_slab(slab);
}
void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
......@@ -322,7 +323,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
slab_flags_t *flags) {}
static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_poison_slab(struct slab *slab) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
void *object) {}
static inline void kasan_poison_object_data(struct kmem_cache *cache,
......
......@@ -536,45 +536,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
return folio->memcg_data & MEMCG_DATA_KMEM;
}
/*
* page_objcgs - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function assumes that the page is known to have an
* associated object cgroups vector. It's not safe to call this function
* against pages, which might have an associated memory cgroup: e.g.
* kernel stack pages.
*/
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
* page_objcgs_check - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function is safe to use if the page can be directly associated
* with a memory cgroup.
*/
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
return NULL;
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
#else
static inline bool folio_memcg_kmem(struct folio *folio)
......@@ -582,15 +543,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
return false;
}
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
return NULL;
}
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
return NULL;
}
#endif
static inline bool PageMemcgKmem(struct page *page)
......
......@@ -863,6 +863,13 @@ static inline struct page *virt_to_head_page(const void *x)
return compound_head(page);
}
static inline struct folio *virt_to_folio(const void *x)
{
struct page *page = virt_to_page(x);
return page_folio(page);
}
void __put_page(struct page *page);
void put_pages_list(struct list_head *pages);
......@@ -1753,6 +1760,11 @@ void page_address_init(void);
#define page_address_init() do { } while(0)
#endif
static inline void *folio_address(const struct folio *folio)
{
return page_address(&folio->page);
}
extern void *page_rmapping(struct page *page);
extern struct anon_vma *page_anon_vma(struct page *page);
extern pgoff_t __page_file_index(struct page *page);
......
......@@ -56,11 +56,11 @@ struct mem_cgroup;
* in each subpage, but you may need to restore some of their values
* afterwards.
*
* SLUB uses cmpxchg_double() to atomically update its freelist and
* counters. That requires that freelist & counters be adjacent and
* double-word aligned. We align all struct pages to double-word
* boundaries, and ensure that 'freelist' is aligned within the
* struct.
* SLUB uses cmpxchg_double() to atomically update its freelist and counters.
* That requires that freelist & counters in struct slab be adjacent and
* double-word aligned. Because struct slab currently just reinterprets the
* bits of struct page, we align all struct pages to double-word boundaries,
* and ensure that 'freelist' is aligned within struct slab.
*/
#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
#define _struct_page_alignment __aligned(2 * sizeof(unsigned long))
......
......@@ -189,14 +189,6 @@ bool kmem_valid_obj(void *object);
void kmem_dump_obj(void *object);
#endif
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
bool to_user);
#else
static inline void __check_heap_object(const void *ptr, unsigned long n,
struct page *page, bool to_user) { }
#endif
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
......
......@@ -87,11 +87,11 @@ struct kmem_cache {
struct kmem_cache_node *node[MAX_NUMNODES];
};
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x)
{
void *object = x - (x - page->s_mem) % cache->size;
void *last_object = page->s_mem + (cache->num - 1) * cache->size;
void *object = x - (x - slab->s_mem) % cache->size;
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
if (unlikely(object > last_object))
return last_object;
......@@ -106,16 +106,16 @@ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
*/
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct page *page, void *obj)
const struct slab *slab, void *obj)
{
u32 offset = (obj - page->s_mem);
u32 offset = (obj - slab->s_mem);
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
}
static inline int objs_per_slab_page(const struct kmem_cache *cache,
const struct page *page)
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
if (is_kfence_address(page_address(page)))
if (is_kfence_address(slab_address(slab)))
return 1;
return cache->num;
}
......
......@@ -48,9 +48,9 @@ enum stat_item {
struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *partial; /* Partially allocated frozen slabs */
struct slab *partial; /* Partially allocated frozen slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
......@@ -99,8 +99,8 @@ struct kmem_cache {
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial pages to keep around */
unsigned int cpu_partial_pages;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
......@@ -156,16 +156,13 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
}
#endif
void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason);
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x) {
void *object = x - (x - page_address(page)) % cache->size;
void *last_object = page_address(page) +
(page->objects - 1) * cache->size;
void *object = x - (x - slab_address(slab)) % cache->size;
void *last_object = slab_address(slab) +
(slab->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
......@@ -181,16 +178,16 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct page *page, void *obj)
const struct slab *slab, void *obj)
{
if (is_kfence_address(obj))
return 0;
return __obj_to_index(cache, page_address(page), obj);
return __obj_to_index(cache, slab_address(slab), obj);
}
static inline int objs_per_slab_page(const struct kmem_cache *cache,
const struct page *page)
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
return page->objects;
return slab->objects;
}
#endif /* _LINUX_SLUB_DEF_H */
......@@ -1933,6 +1933,7 @@ endchoice
config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged"
default y
depends on SLAB || SLUB
help
For reduced kernel memory fragmentation, slab caches can be
merged when they share the same size and other characteristics.
......
......@@ -15,7 +15,7 @@
void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
{
page->freelist = (void *)type;
page->index = type;
SetPagePrivate(page);
set_page_private(page, info);
page_ref_inc(page);
......@@ -23,14 +23,13 @@ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
void put_page_bootmem(struct page *page)
{
unsigned long type;
unsigned long type = page->index;
type = (unsigned long) page->freelist;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
if (page_ref_dec_return(page) == 1) {
page->freelist = NULL;
page->index = 0;
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
......
......@@ -247,8 +247,9 @@ struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
}
#endif
void __kasan_poison_slab(struct page *page)
void __kasan_poison_slab(struct slab *slab)
{
struct page *page = slab_page(slab);
unsigned long i;
for (i = 0; i < compound_nr(page); i++)
......@@ -298,7 +299,7 @@ static inline u8 assign_tag(struct kmem_cache *cache,
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
#ifdef CONFIG_SLAB
/* For SLAB assign tags based on the object index in the freelist. */
return (u8)obj_to_index(cache, virt_to_head_page(object), (void *)object);
return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
#else
/*
* For SLUB assign a random tag during slab creation, otherwise reuse
......@@ -341,7 +342,7 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
if (is_kfence_address(object))
return false;
if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) !=
if (unlikely(nearest_obj(cache, virt_to_slab(object), object) !=
object)) {
kasan_report_invalid_free(tagged_object, ip);
return true;
......@@ -401,9 +402,9 @@ void __kasan_kfree_large(void *ptr, unsigned long ip)
void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
{
struct page *page;
struct folio *folio;
page = virt_to_head_page(ptr);
folio = virt_to_folio(ptr);
/*
* Even though this function is only called for kmem_cache_alloc and
......@@ -411,12 +412,14 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
* !PageSlab() when the size provided to kmalloc is larger than
* KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc.
*/
if (unlikely(!PageSlab(page))) {
if (unlikely(!folio_test_slab(folio))) {
if (____kasan_kfree_large(ptr, ip))
return;
kasan_poison(ptr, page_size(page), KASAN_FREE_PAGE, false);
kasan_poison(ptr, folio_size(folio), KASAN_FREE_PAGE, false);
} else {
____kasan_slab_free(page->slab_cache, ptr, ip, false, false);
struct slab *slab = folio_slab(folio);
____kasan_slab_free(slab->slab_cache, ptr, ip, false, false);
}
}
......@@ -560,7 +563,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flags)
{
struct page *page;
struct slab *slab;
if (unlikely(object == ZERO_SIZE_PTR))
return (void *)object;
......@@ -572,13 +575,13 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag
*/
kasan_unpoison(object, size, false);
page = virt_to_head_page(object);
slab = virt_to_slab(object);
/* Piggy-back on kmalloc() instrumentation to poison the redzone. */
if (unlikely(!PageSlab(page)))
if (unlikely(!slab))
return __kasan_kmalloc_large(object, size, flags);
else
return ____kasan_kmalloc(page->slab_cache, object, size, flags);
return ____kasan_kmalloc(slab->slab_cache, object, size, flags);
}
bool __kasan_check_byte(const void *address, unsigned long ip)
......
......@@ -330,16 +330,16 @@ DEFINE_ASAN_SET_SHADOW(f8);
static void __kasan_record_aux_stack(void *addr, bool can_alloc)
{
struct page *page = kasan_addr_to_page(addr);
struct slab *slab = kasan_addr_to_slab(addr);
struct kmem_cache *cache;
struct kasan_alloc_meta *alloc_meta;
void *object;
if (is_kfence_address(addr) || !(page && PageSlab(page)))
if (is_kfence_address(addr) || !slab)
return;
cache = page->slab_cache;
object = nearest_obj(cache, page, addr);
cache = slab->slab_cache;
object = nearest_obj(cache, slab, addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (!alloc_meta)
return;
......
......@@ -265,6 +265,7 @@ bool kasan_report(unsigned long addr, size_t size,
void kasan_report_invalid_free(void *object, unsigned long ip);
struct page *kasan_addr_to_page(const void *addr);
struct slab *kasan_addr_to_slab(const void *addr);
depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc);
void kasan_set_track(struct kasan_track *track, gfp_t flags);
......
......@@ -117,7 +117,7 @@ static unsigned long quarantine_batch_size;
static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
{
return virt_to_head_page(qlink)->slab_cache;
return virt_to_slab(qlink)->slab_cache;
}
static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
......
......@@ -150,6 +150,14 @@ struct page *kasan_addr_to_page(const void *addr)
return NULL;
}
struct slab *kasan_addr_to_slab(const void *addr)
{
if ((addr >= (void *)PAGE_OFFSET) &&
(addr < high_memory))
return virt_to_slab(addr);
return NULL;
}
static void describe_object_addr(struct kmem_cache *cache, void *object,
const void *addr)
{
......@@ -248,8 +256,9 @@ static void print_address_description(void *addr, u8 tag)
pr_err("\n");
if (page && PageSlab(page)) {
struct kmem_cache *cache = page->slab_cache;
void *object = nearest_obj(cache, page, addr);
struct slab *slab = page_slab(page);
struct kmem_cache *cache = slab->slab_cache;
void *object = nearest_obj(cache, slab, addr);
describe_object(cache, object, addr, tag);
}
......
......@@ -12,7 +12,7 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
#ifdef CONFIG_KASAN_TAGS_IDENTIFY
struct kasan_alloc_meta *alloc_meta;
struct kmem_cache *cache;
struct page *page;
struct slab *slab;
const void *addr;
void *object;
u8 tag;
......@@ -20,10 +20,10 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
tag = get_tag(info->access_addr);
addr = kasan_reset_tag(info->access_addr);
page = kasan_addr_to_page(addr);
if (page && PageSlab(page)) {
cache = page->slab_cache;
object = nearest_obj(cache, page, (void *)addr);
slab = kasan_addr_to_slab(addr);
if (slab) {
cache = slab->slab_cache;
object = nearest_obj(cache, slab, (void *)addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (alloc_meta) {
......
......@@ -360,7 +360,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
{
struct kfence_metadata *meta = NULL;
unsigned long flags;
struct page *page;
struct slab *slab;
void *addr;
/* Try to obtain a free object. */
......@@ -424,13 +424,14 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
alloc_covered_add(alloc_stack_hash, 1);
/* Set required struct page fields. */
page = virt_to_page(meta->addr);
page->slab_cache = cache;
if (IS_ENABLED(CONFIG_SLUB))
page->objects = 1;
if (IS_ENABLED(CONFIG_SLAB))
page->s_mem = addr;
/* Set required slab fields. */
slab = virt_to_slab((void *)meta->addr);
slab->slab_cache = cache;
#if defined(CONFIG_SLUB)
slab->objects = 1;
#elif defined(CONFIG_SLAB)
slab->s_mem = addr;
#endif
/* Memory initialization. */
for_each_canary(meta, set_canary_byte);
......
......@@ -282,7 +282,7 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
alloc = kmalloc(size, gfp);
if (is_kfence_address(alloc)) {
struct page *page = virt_to_head_page(alloc);
struct slab *slab = virt_to_slab(alloc);
struct kmem_cache *s = test_cache ?:
kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
......@@ -291,8 +291,8 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
* even for KFENCE objects; these are required so that
* memcg accounting works correctly.
*/
KUNIT_EXPECT_EQ(test, obj_to_index(s, page, alloc), 0U);
KUNIT_EXPECT_EQ(test, objs_per_slab_page(s, page), 1);
KUNIT_EXPECT_EQ(test, obj_to_index(s, slab, alloc), 0U);
KUNIT_EXPECT_EQ(test, objs_per_slab(s, slab), 1);
if (policy == ALLOCATE_ANY)
return alloc;
......
......@@ -2816,31 +2816,31 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
rcu_read_unlock();
}
int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
gfp_t gfp, bool new_page)
int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab)
{
unsigned int objects = objs_per_slab_page(s, page);
unsigned int objects = objs_per_slab(s, slab);
unsigned long memcg_data;
void *vec;
gfp &= ~OBJCGS_CLEAR_MASK;
vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
page_to_nid(page));
slab_nid(slab));
if (!vec)
return -ENOMEM;
memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
if (new_page) {
if (new_slab) {
/*
* If the slab page is brand new and nobody can yet access
* it's memcg_data, no synchronization is required and
* memcg_data can be simply assigned.
* If the slab is brand new and nobody can yet access its
* memcg_data, no synchronization is required and memcg_data can
* be simply assigned.
*/
page->memcg_data = memcg_data;
} else if (cmpxchg(&page->memcg_data, 0, memcg_data)) {
slab->memcg_data = memcg_data;
} else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) {
/*
* If the slab page is already in use, somebody can allocate
* and assign obj_cgroups in parallel. In this case the existing
* If the slab is already in use, somebody can allocate and
* assign obj_cgroups in parallel. In this case the existing
* objcg vector should be reused.
*/
kfree(vec);
......@@ -2865,38 +2865,43 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
*/
struct mem_cgroup *mem_cgroup_from_obj(void *p)
{
struct page *page;
struct folio *folio;
if (mem_cgroup_disabled())
return NULL;
page = virt_to_head_page(p);
folio = virt_to_folio(p);
/*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
* the page->obj_cgroups.
* slab->memcg_data.
*/
if (page_objcgs_check(page)) {
struct obj_cgroup *objcg;
if (folio_test_slab(folio)) {
struct obj_cgroup **objcgs;
struct slab *slab;
unsigned int off;
off = obj_to_index(page->slab_cache, page, p);
objcg = page_objcgs(page)[off];
if (objcg)
return obj_cgroup_memcg(objcg);
slab = folio_slab(folio);
objcgs = slab_objcgs(slab);
if (!objcgs)
return NULL;
off = obj_to_index(slab->slab_cache, slab, p);
if (objcgs[off])
return obj_cgroup_memcg(objcgs[off]);
return NULL;
}
/*
* page_memcg_check() is used here, because page_has_obj_cgroups()
* check above could fail because the object cgroups vector wasn't set
* at that moment, but it can be set concurrently.
* page_memcg_check() is used here, because in theory we can encounter
* a folio where the slab flag has been cleared already, but
* slab->memcg_data has not been freed yet
* page_memcg_check(page) will guarantee that a proper memory
* cgroup pointer or NULL will be returned.
*/
return page_memcg_check(page);
return page_memcg_check(folio_page(folio, 0));
}
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
......
This diff is collapsed.
This diff is collapsed.
......@@ -550,13 +550,13 @@ bool slab_is_available(void)
*/
bool kmem_valid_obj(void *object)
{
struct page *page;
struct folio *folio;
/* Some arches consider ZERO_SIZE_PTR to be a valid address. */
if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
return false;
page = virt_to_head_page(object);
return PageSlab(page);
folio = virt_to_folio(object);
return folio_test_slab(folio);
}
EXPORT_SYMBOL_GPL(kmem_valid_obj);
......@@ -579,18 +579,18 @@ void kmem_dump_obj(void *object)
{
char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
int i;
struct page *page;
struct slab *slab;
unsigned long ptroffset;
struct kmem_obj_info kp = { };
if (WARN_ON_ONCE(!virt_addr_valid(object)))
return;
page = virt_to_head_page(object);
if (WARN_ON_ONCE(!PageSlab(page))) {
slab = virt_to_slab(object);
if (WARN_ON_ONCE(!slab)) {
pr_cont(" non-slab memory.\n");
return;
}
kmem_obj_info(&kp, object, page);
kmem_obj_info(&kp, object, slab);
if (kp.kp_slab_cache)
pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
else
......
......@@ -30,7 +30,7 @@
* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
* alloc_pages() directly, allocating compound pages so the page order
* does not have to be separately tracked.
* These objects are detected in kfree() because PageSlab()
* These objects are detected in kfree() because folio_test_slab()
* is false for them.
*
* SLAB is emulated on top of SLOB by simply calling constructors and
......@@ -105,21 +105,21 @@ static LIST_HEAD(free_slob_large);
/*
* slob_page_free: true for pages on free_slob_pages list.
*/
static inline int slob_page_free(struct page *sp)
static inline int slob_page_free(struct slab *slab)
{
return PageSlobFree(sp);
return PageSlobFree(slab_page(slab));
}
static void set_slob_page_free(struct page *sp, struct list_head *list)
static void set_slob_page_free(struct slab *slab, struct list_head *list)
{
list_add(&sp->slab_list, list);
__SetPageSlobFree(sp);
list_add(&slab->slab_list, list);
__SetPageSlobFree(slab_page(slab));
}
static inline void clear_slob_page_free(struct page *sp)
static inline void clear_slob_page_free(struct slab *slab)
{
list_del(&sp->slab_list);
__ClearPageSlobFree(sp);
list_del(&slab->slab_list);
__ClearPageSlobFree(slab_page(slab));
}
#define SLOB_UNIT sizeof(slob_t)
......@@ -234,7 +234,7 @@ static void slob_free_pages(void *b, int order)
* freelist, in this case @page_removed_from_list will be set to
* true (set to false otherwise).
*/
static void *slob_page_alloc(struct page *sp, size_t size, int align,
static void *slob_page_alloc(struct slab *sp, size_t size, int align,
int align_offset, bool *page_removed_from_list)
{
slob_t *prev, *cur, *aligned = NULL;
......@@ -301,7 +301,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
int align_offset)
{
struct page *sp;
struct folio *folio;
struct slab *sp;
struct list_head *slob_list;
slob_t *b = NULL;
unsigned long flags;
......@@ -323,7 +324,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
* If there's a node specification, search for a partial
* page with a matching node id in the freelist.
*/
if (node != NUMA_NO_NODE && page_to_nid(sp) != node)
if (node != NUMA_NO_NODE && slab_nid(sp) != node)
continue;
#endif
/* Enough room on this page? */
......@@ -358,8 +359,9 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
if (!b)
return NULL;
sp = virt_to_page(b);
__SetPageSlab(sp);
folio = virt_to_folio(b);
__folio_set_slab(folio);
sp = folio_slab(folio);
spin_lock_irqsave(&slob_lock, flags);
sp->units = SLOB_UNITS(PAGE_SIZE);
......@@ -381,7 +383,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
*/
static void slob_free(void *block, int size)
{
struct page *sp;
struct slab *sp;
slob_t *prev, *next, *b = (slob_t *)block;
slobidx_t units;
unsigned long flags;
......@@ -391,7 +393,7 @@ static void slob_free(void *block, int size)
return;
BUG_ON(!size);
sp = virt_to_page(block);
sp = virt_to_slab(block);
units = SLOB_UNITS(size);
spin_lock_irqsave(&slob_lock, flags);
......@@ -401,8 +403,7 @@ static void slob_free(void *block, int size)
if (slob_page_free(sp))
clear_slob_page_free(sp);
spin_unlock_irqrestore(&slob_lock, flags);
__ClearPageSlab(sp);
page_mapcount_reset(sp);
__folio_clear_slab(slab_folio(sp));
slob_free_pages(b, 0);
return;
}
......@@ -462,10 +463,10 @@ static void slob_free(void *block, int size)
}
#ifdef CONFIG_PRINTK
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
kpp->kp_ptr = object;
kpp->kp_page = page;
kpp->kp_slab = slab;
}
#endif
......@@ -544,7 +545,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
void kfree(const void *block)
{
struct page *sp;
struct folio *sp;
trace_kfree(_RET_IP_, block);
......@@ -552,16 +553,17 @@ void kfree(const void *block)
return;
kmemleak_free(block);
sp = virt_to_page(block);
if (PageSlab(sp)) {
sp = virt_to_folio(block);
if (folio_test_slab(sp)) {
int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align);
} else {
unsigned int order = compound_order(sp);
mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
unsigned int order = folio_order(sp);
mod_node_page_state(folio_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(sp, order);
__free_pages(folio_page(sp, 0), order);
}
}
......@@ -570,7 +572,7 @@ EXPORT_SYMBOL(kfree);
/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t __ksize(const void *block)
{
struct page *sp;
struct folio *folio;
int align;
unsigned int *m;
......@@ -578,9 +580,9 @@ size_t __ksize(const void *block)
if (unlikely(block == ZERO_SIZE_PTR))
return 0;
sp = virt_to_page(block);
if (unlikely(!PageSlab(sp)))
return page_size(sp);
folio = virt_to_folio(block);
if (unlikely(!folio_test_slab(folio)))
return folio_size(folio);
align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
m = (unsigned int *)(block - align);
......
This diff is collapsed.
......@@ -722,7 +722,7 @@ static void free_map_bootmem(struct page *memmap)
>> PAGE_SHIFT;
for (i = 0; i < nr_pages; i++, page++) {
magic = (unsigned long) page->freelist;
magic = page->index;
BUG_ON(magic == NODE_INFO);
......
......@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/jump_label.h>
#include <asm/sections.h>
#include "slab.h"
/*
* Checks if a given pointer and length is contained by the current
......@@ -223,7 +224,7 @@ static inline void check_page_span(const void *ptr, unsigned long n,
static inline void check_heap_object(const void *ptr, unsigned long n,
bool to_user)
{
struct page *page;
struct folio *folio;
if (!virt_addr_valid(ptr))
return;
......@@ -231,16 +232,16 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
/*
* When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
* highmem page or fallback to virt_to_page(). The following
* is effectively a highmem-aware virt_to_head_page().
* is effectively a highmem-aware virt_to_slab().
*/
page = compound_head(kmap_to_page((void *)ptr));
folio = page_folio(kmap_to_page((void *)ptr));
if (PageSlab(page)) {
if (folio_test_slab(folio)) {
/* Check slab allocator for flags and size. */
__check_heap_object(ptr, n, page, to_user);
__check_heap_object(ptr, n, folio_slab(folio), to_user);
} else {
/* Verify object does not incorrectly span multiple pages. */
check_page_span(ptr, n, page, to_user);
check_page_span(ptr, n, folio_page(folio, 0), to_user);
}
}
......
......@@ -17,10 +17,10 @@
*
* Usage of struct page fields:
* page->private: points to zspage
* page->freelist(index): links together all component pages of a zspage
* page->index: links together all component pages of a zspage
* For the huge page, this is always 0, so we use this field
* to store handle.
* page->units: first object offset in a subpage of zspage
* page->page_type: first object offset in a subpage of zspage
*
* Usage of struct page flags:
* PG_private: identifies the first component page
......@@ -489,12 +489,12 @@ static inline struct page *get_first_page(struct zspage *zspage)
static inline int get_first_obj_offset(struct page *page)
{
return page->units;
return page->page_type;
}
static inline void set_first_obj_offset(struct page *page, int offset)
{
page->units = offset;
page->page_type = offset;
}
static inline unsigned int get_freeobj(struct zspage *zspage)
......@@ -827,7 +827,7 @@ static struct page *get_next_page(struct page *page)
if (unlikely(PageHugeObject(page)))
return NULL;
return page->freelist;
return (struct page *)page->index;
}
/**
......@@ -901,7 +901,7 @@ static void reset_page(struct page *page)
set_page_private(page, 0);
page_mapcount_reset(page);
ClearPageHugeObject(page);
page->freelist = NULL;
page->index = 0;
}
static int trylock_zspage(struct zspage *zspage)
......@@ -1027,7 +1027,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
/*
* Allocate individual pages and link them together as:
* 1. all pages are linked together using page->freelist
* 1. all pages are linked together using page->index
* 2. each sub-page point to zspage using page->private
*
* we set PG_private to identify the first page (i.e. no other sub-page
......@@ -1036,7 +1036,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
for (i = 0; i < nr_pages; i++) {
page = pages[i];
set_page_private(page, (unsigned long)zspage);
page->freelist = NULL;
page->index = 0;
if (i == 0) {
zspage->first_page = page;
SetPagePrivate(page);
......@@ -1044,7 +1044,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
class->pages_per_zspage == 1))
SetPageHugeObject(page);
} else {
prev_page->freelist = page;
prev_page->index = (unsigned long)page;
}
prev_page = page;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment