vmbus: put related per-cpu variable together

The hv_context structure had several arrays which were per-cpu and was allocating small structures (tasklet_struct). Instead use a single per-cpu array. Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com> Signed-off-by: K. Y. Srinivasan <kys@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

vmbus: put related per-cpu variable together
The hv_context structure had several arrays which were per-cpu and was allocating small structures (tasklet_struct). Instead use a single per-cpu array. Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com> Signed-off-by: K. Y. Srinivasan <kys@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
37cdd991 · Stephen Hemminger · Greg Kroah-Hartman · 51c6ce2a · 37cdd991 · 37cdd991
Commit 37cdd991 authored Feb 11, 2017 by Stephen Hemminger Committed by Greg Kroah-Hartman Feb 14, 2017
5 changed files
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -353,9 +353,10 @@ static void free_channel(struct vmbus_channel *channel)
 static void percpu_channel_enq(void *arg)
 {
 	struct vmbus_channel *channel = arg;
-	int cpu = smp_processor_id();
+	struct hv_per_cpu_context *hv_cpu
+		= this_cpu_ptr(hv_context.cpu_context);

-	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
+	list_add_tail(&channel->percpu_list, &hv_cpu->chan_list);
 }

 static void percpu_channel_deq(void *arg)
@@ -379,19 +380,21 @@ static void vmbus_release_relid(u32 relid)

 void hv_event_tasklet_disable(struct vmbus_channel *channel)
 {
-	struct tasklet_struct *tasklet;
-	tasklet = hv_context.event_dpc[channel->target_cpu];
-	tasklet_disable(tasklet);
+	struct hv_per_cpu_context *hv_cpu;
+
+	hv_cpu = per_cpu_ptr(hv_context.cpu_context, channel->target_cpu);
+	tasklet_disable(&hv_cpu->event_dpc);
 }

 void hv_event_tasklet_enable(struct vmbus_channel *channel)
 {
-	struct tasklet_struct *tasklet;
-	tasklet = hv_context.event_dpc[channel->target_cpu];
-	tasklet_enable(tasklet);
+	struct hv_per_cpu_context *hv_cpu;
+
+	hv_cpu = per_cpu_ptr(hv_context.cpu_context, channel->target_cpu);
+	tasklet_enable(&hv_cpu->event_dpc);

 	/* In case there is any pending event */
-	tasklet_schedule(tasklet);
+	tasklet_schedule(&hv_cpu->event_dpc);
 }

 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
@@ -726,9 +729,12 @@ static void vmbus_wait_for_unload(void)
 			break;

 		for_each_online_cpu(cpu) {
-			page_addr = hv_context.synic_message_page[cpu];
-			msg = (struct hv_message *)page_addr +
-				VMBUS_MESSAGE_SINT;
+			struct hv_per_cpu_context *hv_cpu
+				= per_cpu_ptr(hv_context.cpu_context, cpu);
+
+			page_addr = hv_cpu->synic_message_page;
+			msg = (struct hv_message *)page_addr
+				+ VMBUS_MESSAGE_SINT;

 			message_type = READ_ONCE(msg->header.message_type);
 			if (message_type == HVMSG_NONE)
@@ -752,7 +758,10 @@ static void vmbus_wait_for_unload(void)
 	 * messages after we reconnect.
 	 */
 	for_each_online_cpu(cpu) {
-		page_addr = hv_context.synic_message_page[cpu];
+		struct hv_per_cpu_context *hv_cpu
+			= per_cpu_ptr(hv_context.cpu_context, cpu);
+
+		page_addr = hv_cpu->synic_message_page;
 		msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
 		msg->header.message_type = HVMSG_NONE;
 	}

--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -93,12 +93,10 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
 	 * all the CPUs. This is needed for kexec to work correctly where
 	 * the CPU attempting to connect may not be CPU 0.
 	 */
-	if (version >= VERSION_WIN8_1) {
-		msg->target_vcpu = hv_context.vp_index[get_cpu()];
-		put_cpu();
-	} else {
+	if (version >= VERSION_WIN8_1)
+		msg->target_vcpu = hv_context.vp_index[smp_processor_id()];
+	else
 		msg->target_vcpu = 0;
-	}

 	/*
 	 * Add to list before we send the request since we may
@@ -269,12 +267,12 @@ void vmbus_disconnect(void)
 */
 static struct vmbus_channel *pcpu_relid2channel(u32 relid)
 {
-	struct vmbus_channel *channel;
+	struct hv_per_cpu_context *hv_cpu
+		= this_cpu_ptr(hv_context.cpu_context);
 	struct vmbus_channel *found_channel = NULL;
-	int cpu = smp_processor_id();
-	struct list_head *pcpu_head = &hv_context.percpu_list[cpu];
+	struct vmbus_channel *channel;

-	list_for_each_entry(channel, pcpu_head, percpu_list) {
+	list_for_each_entry(channel, &hv_cpu->chan_list, percpu_list) {
 		if (channel->offermsg.child_relid == relid) {
 			found_channel = channel;
 			break;
@@ -379,6 +377,7 @@ static void process_chn_event(u32 relid)
 */
 void vmbus_on_event(unsigned long data)
 {
+	struct hv_per_cpu_context *hv_cpu = (void *)data;
 	unsigned long *recv_int_page;
 	u32 maxbits, relid;

@@ -391,8 +390,7 @@ void vmbus_on_event(unsigned long data)
 		 * can be directly checked to get the id of the channel
 		 * that has the interrupt pending.
 		 */
-		int cpu = smp_processor_id();
-		void *page_addr = hv_context.synic_event_page[cpu];
+		void *page_addr = hv_cpu->synic_event_page;
 		union hv_synic_event_flags *event
 			= (union hv_synic_event_flags *)page_addr +
 						 VMBUS_MESSAGE_SINT;

--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -49,24 +49,13 @@ struct hv_context hv_context = {
 */
 int hv_init(void)
 {
-
-	memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS);
-	memset(hv_context.synic_message_page, 0,
-	       sizeof(void *) * NR_CPUS);
-	memset(hv_context.post_msg_page, 0,
-	       sizeof(void *) * NR_CPUS);
-	memset(hv_context.vp_index, 0,
-	       sizeof(int) * NR_CPUS);
-	memset(hv_context.event_dpc, 0,
-	       sizeof(void *) * NR_CPUS);
-	memset(hv_context.msg_dpc, 0,
-	       sizeof(void *) * NR_CPUS);
-	memset(hv_context.clk_evt, 0,
-	       sizeof(void *) * NR_CPUS);
-
 	if (!hv_is_hypercall_page_setup())
 		return -ENOTSUPP;

+	hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
+	if (!hv_context.cpu_context)
+		return -ENOMEM;
+
 	return 0;
 }

@@ -79,25 +68,24 @@ int hv_post_message(union hv_connection_id connection_id,
 		  enum hv_message_type message_type,
 		  void *payload, size_t payload_size)
 {
-
 	struct hv_input_post_message *aligned_msg;
+	struct hv_per_cpu_context *hv_cpu;
 	u64 status;

 	if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
 		return -EMSGSIZE;

-	aligned_msg = (struct hv_input_post_message *)
-			hv_context.post_msg_page[get_cpu()];
-
+	hv_cpu = get_cpu_ptr(hv_context.cpu_context);
+	aligned_msg = hv_cpu->post_msg_page;
 	aligned_msg->connectionid = connection_id;
 	aligned_msg->reserved = 0;
 	aligned_msg->message_type = message_type;
 	aligned_msg->payload_size = payload_size;
 	memcpy((void *)aligned_msg->payload, payload, payload_size);
+	put_cpu_ptr(hv_cpu);

 	status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);

-	put_cpu();
 	return status & 0xFFFF;
 }

@@ -154,8 +142,6 @@ static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)

 int hv_synic_alloc(void)
 {
-	size_t size = sizeof(struct tasklet_struct);
-	size_t ced_size = sizeof(struct clock_event_device);
 	int cpu;

 	hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids,
@@ -166,53 +152,43 @@ int hv_synic_alloc(void)
 	}

 	for_each_present_cpu(cpu) {
-		hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
-		if (hv_context.event_dpc[cpu] == NULL) {
-			pr_err("Unable to allocate event dpc\n");
-			goto err;
-		}
-		tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu);
-
-		hv_context.msg_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
-		if (hv_context.msg_dpc[cpu] == NULL) {
-			pr_err("Unable to allocate event dpc\n");
-			goto err;
-		}
-		tasklet_init(hv_context.msg_dpc[cpu], vmbus_on_msg_dpc, cpu);
-
-		hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC);
-		if (hv_context.clk_evt[cpu] == NULL) {
+		struct hv_per_cpu_context *hv_cpu
+			= per_cpu_ptr(hv_context.cpu_context, cpu);
+
+		memset(hv_cpu, 0, sizeof(*hv_cpu));
+		tasklet_init(&hv_cpu->event_dpc,
+			     vmbus_on_event, (unsigned long) hv_cpu);
+		tasklet_init(&hv_cpu->msg_dpc,
+			     vmbus_on_msg_dpc, (unsigned long) hv_cpu);
+
+		hv_cpu->clk_evt = kzalloc(sizeof(struct clock_event_device),
+					  GFP_KERNEL);
+		if (hv_cpu->clk_evt == NULL) {
 			pr_err("Unable to allocate clock event device\n");
 			goto err;
 		}
+		hv_init_clockevent_device(hv_cpu->clk_evt, cpu);

-		hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu);
-
-		hv_context.synic_message_page[cpu] =
+		hv_cpu->synic_message_page =
 			(void *)get_zeroed_page(GFP_ATOMIC);
-
-		if (hv_context.synic_message_page[cpu] == NULL) {
+		if (hv_cpu->synic_message_page == NULL) {
 			pr_err("Unable to allocate SYNIC message page\n");
 			goto err;
 		}

-		hv_context.synic_event_page[cpu] =
-			(void *)get_zeroed_page(GFP_ATOMIC);
-
-		if (hv_context.synic_event_page[cpu] == NULL) {
+		hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC);
+		if (hv_cpu->synic_event_page == NULL) {
 			pr_err("Unable to allocate SYNIC event page\n");
 			goto err;
 		}

-		hv_context.post_msg_page[cpu] =
-			(void *)get_zeroed_page(GFP_ATOMIC);
-
-		if (hv_context.post_msg_page[cpu] == NULL) {
+		hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
+		if (hv_cpu->post_msg_page == NULL) {
 			pr_err("Unable to allocate post msg page\n");
 			goto err;
 		}

-		INIT_LIST_HEAD(&hv_context.percpu_list[cpu]);
+		INIT_LIST_HEAD(&hv_cpu->chan_list);
 	}

 	return 0;
@@ -220,26 +196,24 @@ int hv_synic_alloc(void)
 	return -ENOMEM;
 }

-static void hv_synic_free_cpu(int cpu)
-{
-	kfree(hv_context.event_dpc[cpu]);
-	kfree(hv_context.msg_dpc[cpu]);
-	kfree(hv_context.clk_evt[cpu]);
-	if (hv_context.synic_event_page[cpu])
-		free_page((unsigned long)hv_context.synic_event_page[cpu]);
-	if (hv_context.synic_message_page[cpu])
-		free_page((unsigned long)hv_context.synic_message_page[cpu]);
-	if (hv_context.post_msg_page[cpu])
-		free_page((unsigned long)hv_context.post_msg_page[cpu]);
-}

 void hv_synic_free(void)
 {
 	int cpu;

+	for_each_present_cpu(cpu) {
+		struct hv_per_cpu_context *hv_cpu
+			= per_cpu_ptr(hv_context.cpu_context, cpu);
+
+		if (hv_cpu->synic_event_page)
+			free_page((unsigned long)hv_cpu->synic_event_page);
+		if (hv_cpu->synic_message_page)
+			free_page((unsigned long)hv_cpu->synic_message_page);
+		if (hv_cpu->post_msg_page)
+			free_page((unsigned long)hv_cpu->post_msg_page);
+	}
+
 	kfree(hv_context.hv_numa_map);
-	for_each_present_cpu(cpu)
-		hv_synic_free_cpu(cpu);
 }

 /*
@@ -251,6 +225,8 @@ void hv_synic_free(void)
 */
 int hv_synic_init(unsigned int cpu)
 {
+	struct hv_per_cpu_context *hv_cpu
+		= per_cpu_ptr(hv_context.cpu_context, cpu);
 	union hv_synic_simp simp;
 	union hv_synic_siefp siefp;
 	union hv_synic_sint shared_sint;
@@ -260,7 +236,7 @@ int hv_synic_init(unsigned int cpu)
 	/* Setup the Synic's message page */
 	hv_get_simp(simp.as_uint64);
 	simp.simp_enabled = 1;
-	simp.base_simp_gpa = virt_to_phys(hv_context.synic_message_page[cpu])
+	simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
 		>> PAGE_SHIFT;

 	hv_set_simp(simp.as_uint64);
@@ -268,7 +244,7 @@ int hv_synic_init(unsigned int cpu)
 	/* Setup the Synic's event page */
 	hv_get_siefp(siefp.as_uint64);
 	siefp.siefp_enabled = 1;
-	siefp.base_siefp_gpa = virt_to_phys(hv_context.synic_event_page[cpu])
+	siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
 		>> PAGE_SHIFT;

 	hv_set_siefp(siefp.as_uint64);
@@ -305,7 +281,7 @@ int hv_synic_init(unsigned int cpu)
 	 * Register the per-cpu clockevent source.
 	 */
 	if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
-		clockevents_config_and_register(hv_context.clk_evt[cpu],
+		clockevents_config_and_register(hv_cpu->clk_evt,
 						HV_TIMER_FREQUENCY,
 						HV_MIN_DELTA_TICKS,
 						HV_MAX_MAX_DELTA_TICKS);
@@ -322,8 +298,12 @@ void hv_synic_clockevents_cleanup(void)
 	if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
 		return;

-	for_each_present_cpu(cpu)
-		clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
+	for_each_present_cpu(cpu) {
+		struct hv_per_cpu_context *hv_cpu
+			= per_cpu_ptr(hv_context.cpu_context, cpu);
+
+		clockevents_unbind_device(hv_cpu->clk_evt, cpu);
+	}
 }

 /*
@@ -372,8 +352,12 @@ int hv_synic_cleanup(unsigned int cpu)

 	/* Turn off clockevent device */
 	if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) {
-		clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
-		hv_ce_shutdown(hv_context.clk_evt[cpu]);
+		struct hv_per_cpu_context *hv_cpu
+			= this_cpu_ptr(hv_context.cpu_context);
+
+		clockevents_unbind_device(hv_cpu->clk_evt, cpu);
+		hv_ce_shutdown(hv_cpu->clk_evt);
+		put_cpu_ptr(hv_cpu);
 	}

 	hv_get_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT,

--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -29,6 +29,7 @@
 #include <asm/sync_bitops.h>
 #include <linux/atomic.h>
 #include <linux/hyperv.h>
+#include <linux/interrupt.h>

 /*
 * Timeout for services such as KVP and fcopy.
@@ -189,6 +190,33 @@ enum {
 	VMBUS_MESSAGE_SINT		= 2,
 };

+/*
+ * Per cpu state for channel handling
+ */
+struct hv_per_cpu_context {
+	void *synic_message_page;
+	void *synic_event_page;
+	/*
+	 * buffer to post messages to the host.
+	 */
+	void *post_msg_page;
+
+	/*
+	 * Starting with win8, we can take channel interrupts on any CPU;
+	 * we will manage the tasklet that handles events messages on a per CPU
+	 * basis.
+	 */
+	struct tasklet_struct event_dpc;
+	struct tasklet_struct msg_dpc;
+
+	/*
+	 * To optimize the mapping of relid to channel, maintain
+	 * per-cpu list of the channels based on their CPU affinity.
+	 */
+	struct list_head chan_list;
+	struct clock_event_device *clk_evt;
+};
+
 struct hv_context {
 	/* We only support running on top of Hyper-V
 	* So at this point this really can only contain the Hyper-V ID
@@ -199,8 +227,8 @@ struct hv_context {

 	bool synic_initialized;

-	void *synic_message_page[NR_CPUS];
-	void *synic_event_page[NR_CPUS];
+	struct hv_per_cpu_context __percpu *cpu_context;
+
 	/*
 	 * Hypervisor's notion of virtual processor ID is different from
 	 * Linux' notion of CPU ID. This information can only be retrieved
@@ -211,26 +239,7 @@ struct hv_context {
 	 * Linux cpuid 'a'.
 	 */
 	u32 vp_index[NR_CPUS];
-	/*
-	 * Starting with win8, we can take channel interrupts on any CPU;
-	 * we will manage the tasklet that handles events messages on a per CPU
-	 * basis.
-	 */
-	struct tasklet_struct *event_dpc[NR_CPUS];
-	struct tasklet_struct *msg_dpc[NR_CPUS];
-	/*
-	 * To optimize the mapping of relid to channel, maintain
-	 * per-cpu list of the channels based on their CPU affinity.
-	 */
-	struct list_head percpu_list[NR_CPUS];
-	/*
-	 * buffer to post messages to the host.
-	 */
-	void *post_msg_page[NR_CPUS];
-	/*
-	 * Support PV clockevent device.
-	 */
-	struct clock_event_device *clk_evt[NR_CPUS];
+
 	/*
 	 * To manage allocations in a NUMA node.
 	 * Array indexed by numa node ID.

--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -835,9 +835,10 @@ static void vmbus_onmessage_work(struct work_struct *work)
 	kfree(ctx);
 }

-static void hv_process_timer_expiration(struct hv_message *msg, int cpu)
+static void hv_process_timer_expiration(struct hv_message *msg,
+					struct hv_per_cpu_context *hv_cpu)
 {
-	struct clock_event_device *dev = hv_context.clk_evt[cpu];
+	struct clock_event_device *dev = hv_cpu->clk_evt;

 	if (dev->event_handler)
 		dev->event_handler(dev);
@@ -847,8 +848,8 @@ static void hv_process_timer_expiration(struct hv_message *msg, int cpu)

 void vmbus_on_msg_dpc(unsigned long data)
 {
-	int cpu = smp_processor_id();
-	void *page_addr = hv_context.synic_message_page[cpu];
+	struct hv_per_cpu_context *hv_cpu = (void *)data;
+	void *page_addr = hv_cpu->synic_message_page;
 	struct hv_message *msg = (struct hv_message *)page_addr +
 				  VMBUS_MESSAGE_SINT;
 	struct vmbus_channel_message_header *hdr;
@@ -886,14 +887,14 @@ void vmbus_on_msg_dpc(unsigned long data)

 static void vmbus_isr(void)
 {
-	int cpu = smp_processor_id();
-	void *page_addr;
+	struct hv_per_cpu_context *hv_cpu
+		= this_cpu_ptr(hv_context.cpu_context);
+	void *page_addr = hv_cpu->synic_event_page;
 	struct hv_message *msg;
 	union hv_synic_event_flags *event;
 	bool handled = false;

-	page_addr = hv_context.synic_event_page[cpu];
-	if (page_addr == NULL)
+	if (unlikely(page_addr == NULL))
 		return;

 	event = (union hv_synic_event_flags *)page_addr +
@@ -921,18 +922,18 @@ static void vmbus_isr(void)
 	}

 	if (handled)
-		tasklet_schedule(hv_context.event_dpc[cpu]);
+		tasklet_schedule(&hv_cpu->event_dpc);


-	page_addr = hv_context.synic_message_page[cpu];
+	page_addr = hv_cpu->synic_message_page;
 	msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;

 	/* Check if there are actual msgs to be processed */
 	if (msg->header.message_type != HVMSG_NONE) {
 		if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
-			hv_process_timer_expiration(msg, cpu);
+			hv_process_timer_expiration(msg, hv_cpu);
 		else
-			tasklet_schedule(hv_context.msg_dpc[cpu]);
+			tasklet_schedule(&hv_cpu->msg_dpc);
 	}

 	add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
@@ -1521,9 +1522,14 @@ static void __exit vmbus_exit(void)
 	hv_synic_clockevents_cleanup();
 	vmbus_disconnect();
 	hv_remove_vmbus_irq();
-	for_each_online_cpu(cpu)
-		tasklet_kill(hv_context.msg_dpc[cpu]);
+	for_each_online_cpu(cpu) {
+		struct hv_per_cpu_context *hv_cpu
+			= per_cpu_ptr(hv_context.cpu_context, cpu);
+
+		tasklet_kill(&hv_cpu->msg_dpc);
+	}
 	vmbus_free_channels();
+
 	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
 		unregister_die_notifier(&hyperv_die_block);
 		atomic_notifier_chain_unregister(&panic_notifier_list,
@@ -1531,7 +1537,10 @@ static void __exit vmbus_exit(void)
 	}
 	bus_unregister(&hv_bus);
 	for_each_online_cpu(cpu) {
-		tasklet_kill(hv_context.event_dpc[cpu]);
+		struct hv_per_cpu_context *hv_cpu
+			= per_cpu_ptr(hv_context.cpu_context, cpu);
+
+		tasklet_kill(&hv_cpu->event_dpc);
 	}
 	cpuhp_remove_state(hyperv_cpuhp_online);
 	hv_synic_free();