Commit 6e768f06 authored by Sebastian Sanchez's avatar Sebastian Sanchez Committed by Doug Ledford

IB/hfi1: Optimize devdata cachelines

Profiling shows hot path struct members that need
to be in a minimum set of cachelines.

Group these struct member in the same cacheline:
	sc2vl_lock
	sc2vl
	rhf_rcv_function_map
	rcv_limit
	rhf_offset

Group these struct member in the same cacheline:
	process_pio_send
	process_dma_send
	pport
	rcd
	int_counter
	flags
	num_pports
	first_user_ctxt

Fill holes in struct hfi1_devdata revealed by pahole.
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarSebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent a6cd5f08
...@@ -852,32 +852,29 @@ struct hfi1_devdata { ...@@ -852,32 +852,29 @@ struct hfi1_devdata {
u8 __iomem *kregend; u8 __iomem *kregend;
/* physical address of chip for io_remap, etc. */ /* physical address of chip for io_remap, etc. */
resource_size_t physaddr; resource_size_t physaddr;
/* receive context data */ /* Per VL data. Enough for all VLs but not all elements are set/used. */
struct hfi1_ctxtdata **rcd; struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */ /* send context data */
struct send_context_info *send_contexts; struct send_context_info *send_contexts;
/* map hardware send contexts to software index */ /* map hardware send contexts to software index */
u8 *hw_to_sw; u8 *hw_to_sw;
/* spinlock for allocating and releasing send context resources */ /* spinlock for allocating and releasing send context resources */
spinlock_t sc_lock; spinlock_t sc_lock;
/* Per VL data. Enough for all VLs but not all elements are set/used. */
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* lock for pio_map */ /* lock for pio_map */
spinlock_t pio_map_lock; spinlock_t pio_map_lock;
/* Send Context initialization lock. */
spinlock_t sc_init_lock;
/* lock for sdma_map */
spinlock_t sde_map_lock;
/* array of kernel send contexts */ /* array of kernel send contexts */
struct send_context **kernel_send_context; struct send_context **kernel_send_context;
/* array of vl maps */ /* array of vl maps */
struct pio_vl_map __rcu *pio_map; struct pio_vl_map __rcu *pio_map;
/* seqlock for sc2vl */ /* default flags to last descriptor */
seqlock_t sc2vl_lock; u64 default_desc1;
u64 sc2vl[4];
/* Send Context initialization lock. */
spinlock_t sc_init_lock;
/* fields common to all SDMA engines */ /* fields common to all SDMA engines */
/* default flags to last descriptor */
u64 default_desc1;
volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */ volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */
dma_addr_t sdma_heads_phys; dma_addr_t sdma_heads_phys;
void *sdma_pad_dma; /* DMA'ed by chip */ void *sdma_pad_dma; /* DMA'ed by chip */
...@@ -888,8 +885,6 @@ struct hfi1_devdata { ...@@ -888,8 +885,6 @@ struct hfi1_devdata {
u32 chip_sdma_engines; u32 chip_sdma_engines;
/* num used */ /* num used */
u32 num_sdma; u32 num_sdma;
/* lock for sdma_map */
spinlock_t sde_map_lock;
/* array of engines sized by num_sdma */ /* array of engines sized by num_sdma */
struct sdma_engine *per_sdma; struct sdma_engine *per_sdma;
/* array of vl maps */ /* array of vl maps */
...@@ -898,14 +893,11 @@ struct hfi1_devdata { ...@@ -898,14 +893,11 @@ struct hfi1_devdata {
wait_queue_head_t sdma_unfreeze_wq; wait_queue_head_t sdma_unfreeze_wq;
atomic_t sdma_unfreeze_count; atomic_t sdma_unfreeze_count;
u32 lcb_access_count; /* count of LCB users */
/* common data between shared ASIC HFIs in this OS */ /* common data between shared ASIC HFIs in this OS */
struct hfi1_asic_data *asic_data; struct hfi1_asic_data *asic_data;
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
struct hfi1_pportdata *pport;
/* mem-mapped pointer to base of PIO buffers */ /* mem-mapped pointer to base of PIO buffers */
void __iomem *piobase; void __iomem *piobase;
/* /*
...@@ -922,20 +914,13 @@ struct hfi1_devdata { ...@@ -922,20 +914,13 @@ struct hfi1_devdata {
/* send context numbers and sizes for each type */ /* send context numbers and sizes for each type */
struct sc_config_sizes sc_sizes[SC_MAX]; struct sc_config_sizes sc_sizes[SC_MAX];
u32 lcb_access_count; /* count of LCB users */
char *boardname; /* human readable board info */ char *boardname; /* human readable board info */
/* device (not port) flags, basically device capabilities */
u32 flags;
/* reset value */ /* reset value */
u64 z_int_counter; u64 z_int_counter;
u64 z_rcv_limit; u64 z_rcv_limit;
u64 z_send_schedule; u64 z_send_schedule;
/* percpu int_counter */
u64 __percpu *int_counter;
u64 __percpu *rcv_limit;
u64 __percpu *send_schedule; u64 __percpu *send_schedule;
/* number of receive contexts in use by the driver */ /* number of receive contexts in use by the driver */
u32 num_rcv_contexts; u32 num_rcv_contexts;
...@@ -950,6 +935,7 @@ struct hfi1_devdata { ...@@ -950,6 +935,7 @@ struct hfi1_devdata {
/* base receive interrupt timeout, in CSR units */ /* base receive interrupt timeout, in CSR units */
u32 rcv_intr_timeout_csr; u32 rcv_intr_timeout_csr;
u32 freezelen; /* max length of freezemsg */
u64 __iomem *egrtidbase; u64 __iomem *egrtidbase;
spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
...@@ -971,7 +957,6 @@ struct hfi1_devdata { ...@@ -971,7 +957,6 @@ struct hfi1_devdata {
* IB link status cheaply * IB link status cheaply
*/ */
struct hfi1_status *status; struct hfi1_status *status;
u32 freezelen; /* max length of freezemsg */
/* revision register shadow */ /* revision register shadow */
u64 revision; u64 revision;
...@@ -999,6 +984,8 @@ struct hfi1_devdata { ...@@ -999,6 +984,8 @@ struct hfi1_devdata {
u16 rcvegrbufsize_shift; u16 rcvegrbufsize_shift;
/* both sides of the PCIe link are gen3 capable */ /* both sides of the PCIe link are gen3 capable */
u8 link_gen3_capable; u8 link_gen3_capable;
/* default link down value (poll/sleep) */
u8 link_default;
/* localbus width (1, 2,4,8,16,32) from config space */ /* localbus width (1, 2,4,8,16,32) from config space */
u32 lbus_width; u32 lbus_width;
/* localbus speed in MHz */ /* localbus speed in MHz */
...@@ -1034,8 +1021,6 @@ struct hfi1_devdata { ...@@ -1034,8 +1021,6 @@ struct hfi1_devdata {
u8 hfi1_id; u8 hfi1_id;
/* implementation code */ /* implementation code */
u8 icode; u8 icode;
/* default link down value (poll/sleep) */
u8 link_default;
/* vAU of this device */ /* vAU of this device */
u8 vau; u8 vau;
/* vCU of this device */ /* vCU of this device */
...@@ -1046,27 +1031,17 @@ struct hfi1_devdata { ...@@ -1046,27 +1031,17 @@ struct hfi1_devdata {
u16 vl15_init; u16 vl15_init;
/* Misc small ints */ /* Misc small ints */
/* Number of physical ports available */
u8 num_pports;
/* Lowest context number which can be used by user processes */
u8 first_user_ctxt;
u8 n_krcv_queues; u8 n_krcv_queues;
u8 qos_shift; u8 qos_shift;
u8 qpn_mask;
u16 rhf_offset; /* offset of RHF within receive header entry */
u16 irev; /* implementation revision */ u16 irev; /* implementation revision */
u16 dc8051_ver; /* 8051 firmware version */ u16 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config; struct platform_config platform_config;
struct platform_config_cache pcfg_cache; struct platform_config_cache pcfg_cache;
struct diag_client *diag_client; struct diag_client *diag_client;
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
u8 psxmitwait_supported;
/* cycle length of PS* counters in HW (in picoseconds) */
u16 psxmitwait_check_rate;
/* MSI-X information */ /* MSI-X information */
struct hfi1_msix_entry *msix_entries; struct hfi1_msix_entry *msix_entries;
...@@ -1081,6 +1056,9 @@ struct hfi1_devdata { ...@@ -1081,6 +1056,9 @@ struct hfi1_devdata {
struct rcv_array_data rcv_entries; struct rcv_array_data rcv_entries;
/* cycle length of PS* counters in HW (in picoseconds) */
u16 psxmitwait_check_rate;
/* /*
* 64 bit synthetic counters * 64 bit synthetic counters
*/ */
...@@ -1113,11 +1091,11 @@ struct hfi1_devdata { ...@@ -1113,11 +1091,11 @@ struct hfi1_devdata {
struct err_info_rcvport err_info_rcvport; struct err_info_rcvport err_info_rcvport;
struct err_info_constraint err_info_rcv_constraint; struct err_info_constraint err_info_rcv_constraint;
struct err_info_constraint err_info_xmit_constraint; struct err_info_constraint err_info_xmit_constraint;
u8 err_info_uncorrectable;
u8 err_info_fmconfig;
atomic_t drop_packet; atomic_t drop_packet;
u8 do_drop; u8 do_drop;
u8 err_info_uncorrectable;
u8 err_info_fmconfig;
/* /*
* Software counters for the status bits defined by the * Software counters for the status bits defined by the
...@@ -1140,47 +1118,71 @@ struct hfi1_devdata { ...@@ -1140,47 +1118,71 @@ struct hfi1_devdata {
u64 sw_cce_err_status_aggregate; u64 sw_cce_err_status_aggregate;
/* Software counter that aggregates all bypass packet rcv errors */ /* Software counter that aggregates all bypass packet rcv errors */
u64 sw_rcv_bypass_packet_errors; u64 sw_rcv_bypass_packet_errors;
/* receive interrupt functions */ /* receive interrupt function */
rhf_rcv_function_ptr *rhf_rcv_function_map;
rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
/* Save the enabled LCB error bits */
u64 lcb_err_en;
/* /*
* Handlers for outgoing data so that snoop/capture does not * Handlers for outgoing data so that snoop/capture does not
* have to have its hooks in the send path * have to have its hooks in the send path
*/ */
send_routine process_pio_send; send_routine process_pio_send ____cacheline_aligned_in_smp;
send_routine process_dma_send; send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count); u64 pbc, const void *from, size_t count);
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
struct hfi1_pportdata *pport;
/* receive context data */
struct hfi1_ctxtdata **rcd;
u64 __percpu *int_counter;
/* device (not port) flags, basically device capabilities */
u16 flags;
/* Number of physical ports available */
u8 num_pports;
/* Lowest context number which can be used by user processes */
u8 first_user_ctxt;
/* adding a new field here would make it part of this cacheline */
/* seqlock for sc2vl */
seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
u64 sc2vl[4];
/* receive interrupt functions */
rhf_rcv_function_ptr *rhf_rcv_function_map;
u64 __percpu *rcv_limit;
u16 rhf_offset; /* offset of RHF within receive header entry */
/* adding a new field here would make it part of this cacheline */
/* OUI comes from the HW. Used everywhere as 3 separate bytes. */ /* OUI comes from the HW. Used everywhere as 3 separate bytes. */
u8 oui1; u8 oui1;
u8 oui2; u8 oui2;
u8 oui3; u8 oui3;
u8 dc_shutdown;
/* Timer and counter used to detect RcvBufOvflCnt changes */ /* Timer and counter used to detect RcvBufOvflCnt changes */
struct timer_list rcverr_timer; struct timer_list rcverr_timer;
u32 rcv_ovfl_cnt;
wait_queue_head_t event_queue; wait_queue_head_t event_queue;
/* Save the enabled LCB error bits */
u64 lcb_err_en;
u8 dc_shutdown;
/* receive context tail dummy address */ /* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr; __le64 *rcvhdrtail_dummy_kvaddr;
dma_addr_t rcvhdrtail_dummy_dma; dma_addr_t rcvhdrtail_dummy_dma;
bool eprom_available; /* true if EPROM is available for this device */ u32 rcv_ovfl_cnt;
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
/* Serialize ASPM enable/disable between multiple verbs contexts */ /* Serialize ASPM enable/disable between multiple verbs contexts */
spinlock_t aspm_lock; spinlock_t aspm_lock;
/* Number of verbs contexts which have disabled ASPM */ /* Number of verbs contexts which have disabled ASPM */
atomic_t aspm_disabled_cnt; atomic_t aspm_disabled_cnt;
struct hfi1_affinity *affinity; struct hfi1_affinity *affinity;
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable sdma_rht; struct rhashtable sdma_rht;
struct kobject kobj; struct kobject kobj;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment