Commit c0ebfdc3 authored by Dennis Zhou (Facebook)'s avatar Dennis Zhou (Facebook) Committed by Tejun Heo

percpu: modify base_addr to be region specific

Originally, the first chunk was served by one or two chunks, each
given a region they are responsible for. Despite this, the arithmetic
was based off of the true base_addr of the chunk making it be overly
inclusive.

This patch moves the base_addr of chunks that are responsible for the
first chunk. The base_addr must remain page aligned to keep the
address alignment correct, so it is the beginning of the region served
page aligned down. start_offset holds where the region served begins
from this new base_addr.

The corresponding percpu address checks are modified to be more specific
as a result. The first chunk considers only the dynamic region and both
first chunk and reserved chunk checks ignore the static region. The
static region addresses should never be passed into the allocator. There
is no impact here besides distinguishing the first chunk and making the
checks specific.

The percpu pointer to physical address is left intact as addresses are
not given out in the non-allocated portion of percpu memory.

nr_pages is added to pcpu_chunk to keep track of the size of the entire
region served containing both start_offset and end_offset. This variable
will be used to manage the bitmap allocator.
Signed-off-by: default avatarDennis Zhou <dennisszhou@gmail.com>
Reviewed-by: default avatarJosef Bacik <jbacik@fb.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 0c4169c3
...@@ -29,6 +29,8 @@ struct pcpu_chunk { ...@@ -29,6 +29,8 @@ struct pcpu_chunk {
int end_offset; /* additional area required to int end_offset; /* additional area required to
have the region end page have the region end page
aligned */ aligned */
int nr_pages; /* # of pages served by this chunk */
int nr_populated; /* # of populated pages */ int nr_populated; /* # of populated pages */
unsigned long populated[]; /* populated bitmap */ unsigned long populated[]; /* populated bitmap */
}; };
......
...@@ -181,19 +181,55 @@ static void pcpu_schedule_balance_work(void) ...@@ -181,19 +181,55 @@ static void pcpu_schedule_balance_work(void)
schedule_work(&pcpu_balance_work); schedule_work(&pcpu_balance_work);
} }
/**
* pcpu_addr_in_first_chunk - address check for first chunk's dynamic region
* @addr: percpu address of interest
*
* The first chunk is considered to be the dynamic region of the first chunk.
* While the true first chunk is composed of the static, dynamic, and
* reserved regions, it is the chunk that serves the dynamic region that is
* circulated in the chunk slots.
*
* The reserved chunk has a separate check and the static region addresses
* should never be passed into the percpu allocator.
*
* RETURNS:
* True if the address is in the dynamic region of the first chunk.
*/
static bool pcpu_addr_in_first_chunk(void *addr) static bool pcpu_addr_in_first_chunk(void *addr)
{ {
void *first_start = pcpu_first_chunk->base_addr; void *start_addr = pcpu_first_chunk->base_addr +
pcpu_first_chunk->start_offset;
void *end_addr = pcpu_first_chunk->base_addr +
pcpu_first_chunk->nr_pages * PAGE_SIZE -
pcpu_first_chunk->end_offset;
return addr >= first_start && addr < first_start + pcpu_unit_size; return addr >= start_addr && addr < end_addr;
} }
/**
* pcpu_addr_in_reserved_chunk - address check for reserved region
*
* The reserved region is a part of the first chunk and primarily serves
* static percpu variables from kernel modules.
*
* RETURNS:
* True if the address is in the reserved region.
*/
static bool pcpu_addr_in_reserved_chunk(void *addr) static bool pcpu_addr_in_reserved_chunk(void *addr)
{ {
void *first_start = pcpu_first_chunk->base_addr; void *start_addr, *end_addr;
if (!pcpu_reserved_chunk)
return false;
return addr >= first_start && start_addr = pcpu_reserved_chunk->base_addr +
addr < first_start + pcpu_first_chunk->start_offset; pcpu_reserved_chunk->start_offset;
end_addr = pcpu_reserved_chunk->base_addr +
pcpu_reserved_chunk->nr_pages * PAGE_SIZE -
pcpu_reserved_chunk->end_offset;
return addr >= start_addr && addr < end_addr;
} }
static int __pcpu_size_to_slot(int size) static int __pcpu_size_to_slot(int size)
...@@ -234,11 +270,16 @@ static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx) ...@@ -234,11 +270,16 @@ static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx; return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
} }
static unsigned long pcpu_unit_page_offset(unsigned int cpu, int page_idx)
{
return pcpu_unit_offsets[cpu] + (page_idx << PAGE_SHIFT);
}
static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
unsigned int cpu, int page_idx) unsigned int cpu, int page_idx)
{ {
return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] + return (unsigned long)chunk->base_addr +
(page_idx << PAGE_SHIFT); pcpu_unit_page_offset(cpu, page_idx);
} }
static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk, static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk,
...@@ -708,23 +749,34 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme, ...@@ -708,23 +749,34 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme,
pcpu_chunk_relocate(chunk, oslot); pcpu_chunk_relocate(chunk, oslot);
} }
static struct pcpu_chunk * __init pcpu_alloc_first_chunk(void *base_addr, static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
int start_offset,
int map_size, int map_size,
int *map, int *map,
int init_map_size) int init_map_size)
{ {
struct pcpu_chunk *chunk; struct pcpu_chunk *chunk;
int region_size; unsigned long aligned_addr;
int start_offset, region_size;
/* region calculations */
aligned_addr = tmp_addr & PAGE_MASK;
start_offset = tmp_addr - aligned_addr;
region_size = PFN_ALIGN(start_offset + map_size); region_size = PFN_ALIGN(start_offset + map_size);
/* allocate chunk */
chunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); chunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
INIT_LIST_HEAD(&chunk->list); INIT_LIST_HEAD(&chunk->list);
INIT_LIST_HEAD(&chunk->map_extend_list); INIT_LIST_HEAD(&chunk->map_extend_list);
chunk->base_addr = base_addr;
chunk->base_addr = (void *)aligned_addr;
chunk->start_offset = start_offset; chunk->start_offset = start_offset;
chunk->end_offset = region_size - chunk->start_offset - map_size; chunk->end_offset = region_size - chunk->start_offset - map_size;
chunk->nr_pages = pcpu_unit_pages;
chunk->map = map; chunk->map = map;
chunk->map_alloc = init_map_size; chunk->map_alloc = init_map_size;
...@@ -734,10 +786,17 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(void *base_addr, ...@@ -734,10 +786,17 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(void *base_addr,
chunk->nr_populated = pcpu_unit_pages; chunk->nr_populated = pcpu_unit_pages;
chunk->contig_hint = chunk->free_size = map_size; chunk->contig_hint = chunk->free_size = map_size;
if (chunk->start_offset) {
/* hide the beginning of the bitmap */
chunk->map[0] = 1; chunk->map[0] = 1;
chunk->map[1] = chunk->start_offset; chunk->map[1] = chunk->start_offset;
chunk->map[2] = (chunk->start_offset + chunk->free_size) | 1; chunk->map_used = 1;
chunk->map_used = 2; }
/* set chunk's free region */
chunk->map[++chunk->map_used] =
(chunk->start_offset + chunk->free_size) | 1;
if (chunk->end_offset) { if (chunk->end_offset) {
/* hide the end of the bitmap */ /* hide the end of the bitmap */
...@@ -772,6 +831,8 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) ...@@ -772,6 +831,8 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
chunk->free_size = pcpu_unit_size; chunk->free_size = pcpu_unit_size;
chunk->contig_hint = pcpu_unit_size; chunk->contig_hint = pcpu_unit_size;
chunk->nr_pages = pcpu_unit_pages;
return chunk; return chunk;
} }
...@@ -859,18 +920,21 @@ static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); ...@@ -859,18 +920,21 @@ static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
* pcpu_chunk_addr_search - determine chunk containing specified address * pcpu_chunk_addr_search - determine chunk containing specified address
* @addr: address for which the chunk needs to be determined. * @addr: address for which the chunk needs to be determined.
* *
* This is an internal function that handles all but static allocations.
* Static percpu address values should never be passed into the allocator.
*
* RETURNS: * RETURNS:
* The address of the found chunk. * The address of the found chunk.
*/ */
static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
{ {
/* is it in the first chunk? */ /* is it in the dynamic region (first chunk)? */
if (pcpu_addr_in_first_chunk(addr)) { if (pcpu_addr_in_first_chunk(addr))
/* is it in the reserved area? */ return pcpu_first_chunk;
/* is it in the reserved region? */
if (pcpu_addr_in_reserved_chunk(addr)) if (pcpu_addr_in_reserved_chunk(addr))
return pcpu_reserved_chunk; return pcpu_reserved_chunk;
return pcpu_first_chunk;
}
/* /*
* The address is relative to unit0 which might be unused and * The address is relative to unit0 which might be unused and
...@@ -1401,10 +1465,16 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) ...@@ -1401,10 +1465,16 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
* The following test on unit_low/high isn't strictly * The following test on unit_low/high isn't strictly
* necessary but will speed up lookups of addresses which * necessary but will speed up lookups of addresses which
* aren't in the first chunk. * aren't in the first chunk.
*
* The address check is against full chunk sizes. pcpu_base_addr
* points to the beginning of the first chunk including the
* static region. Assumes good intent as the first chunk may
* not be full (ie. < pcpu_unit_pages in size).
*/ */
first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0); first_low = (unsigned long)pcpu_base_addr +
first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu, pcpu_unit_page_offset(pcpu_low_unit_cpu, 0);
pcpu_unit_pages); first_high = (unsigned long)pcpu_base_addr +
pcpu_unit_page_offset(pcpu_high_unit_cpu, pcpu_unit_pages);
if ((unsigned long)addr >= first_low && if ((unsigned long)addr >= first_low &&
(unsigned long)addr < first_high) { (unsigned long)addr < first_high) {
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
...@@ -1586,12 +1656,13 @@ static void pcpu_dump_alloc_info(const char *lvl, ...@@ -1586,12 +1656,13 @@ static void pcpu_dump_alloc_info(const char *lvl,
* The caller should have mapped the first chunk at @base_addr and * The caller should have mapped the first chunk at @base_addr and
* copied static data to each unit. * copied static data to each unit.
* *
* If the first chunk ends up with both reserved and dynamic areas, it * The first chunk will always contain a static and a dynamic region.
* is served by two chunks - one to serve the core static and reserved * However, the static region is not managed by any chunk. If the first
* areas and the other for the dynamic area. They share the same vm * chunk also contains a reserved region, it is served by two chunks -
* and page map but uses different area allocation map to stay away * one for the reserved region and one for the dynamic region. They
* from each other. The latter chunk is circulated in the chunk slots * share the same vm, but use offset regions in the area allocation map.
* and available for dynamic allocation like any other chunks. * The chunk serving the dynamic region is circulated in the chunk slots
* and available for dynamic allocation like any other chunk.
* *
* RETURNS: * RETURNS:
* 0 on success, -errno on failure. * 0 on success, -errno on failure.
...@@ -1609,7 +1680,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1609,7 +1680,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
unsigned int cpu; unsigned int cpu;
int *unit_map; int *unit_map;
int group, unit, i; int group, unit, i;
int map_size, start_offset; int map_size;
unsigned long tmp_addr;
#define PCPU_SETUP_BUG_ON(cond) do { \ #define PCPU_SETUP_BUG_ON(cond) do { \
if (unlikely(cond)) { \ if (unlikely(cond)) { \
...@@ -1712,25 +1784,26 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1712,25 +1784,26 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
INIT_LIST_HEAD(&pcpu_slot[i]); INIT_LIST_HEAD(&pcpu_slot[i]);
/* /*
* Initialize static chunk. If reserved_size is zero, the * Initialize first chunk.
* static chunk covers static area + dynamic allocation area * If the reserved_size is non-zero, this initializes the reserved
* in the first chunk. If reserved_size is not zero, it * chunk. If the reserved_size is zero, the reserved chunk is NULL
* covers static area + reserved area (mostly used for module * and the dynamic region is initialized here. The first chunk,
* static percpu allocation). * pcpu_first_chunk, will always point to the chunk that serves
* the dynamic region.
*/ */
start_offset = ai->static_size; tmp_addr = (unsigned long)base_addr + ai->static_size;
map_size = ai->reserved_size ?: ai->dyn_size; map_size = ai->reserved_size ?: ai->dyn_size;
chunk = pcpu_alloc_first_chunk(base_addr, start_offset, map_size, smap, chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, smap,
ARRAY_SIZE(smap)); ARRAY_SIZE(smap));
/* init dynamic chunk if necessary */ /* init dynamic chunk if necessary */
if (ai->reserved_size) { if (ai->reserved_size) {
pcpu_reserved_chunk = chunk; pcpu_reserved_chunk = chunk;
start_offset = ai->static_size + ai->reserved_size; tmp_addr = (unsigned long)base_addr + ai->static_size +
ai->reserved_size;
map_size = ai->dyn_size; map_size = ai->dyn_size;
chunk = pcpu_alloc_first_chunk(base_addr, start_offset, chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, dmap,
map_size, dmap,
ARRAY_SIZE(dmap)); ARRAY_SIZE(dmap));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment