• Eric Dumazet's avatar
    net_sched: sch_fq: struct sched_data reorg · 54ff8ad6
    Eric Dumazet authored
    q->flows can be often modified, and q->timer_slack is read mostly.
    
    Exchange the two fields, so that cache line countaining
    quantum, initial_quantum, and other critical parameters
    stay clean (read-mostly).
    
    Move q->watchdog next to q->stat_throttled
    
    Add comments explaining how the structure is split in
    three different parts.
    
    pahole output before the patch:
    
    struct fq_sched_data {
    	struct fq_flow_head        new_flows;            /*     0  0x10 */
    	struct fq_flow_head        old_flows;            /*  0x10  0x10 */
    	struct rb_root             delayed;              /*  0x20   0x8 */
    	u64                        time_next_delayed_flow; /*  0x28   0x8 */
    	u64                        ktime_cache;          /*  0x30   0x8 */
    	unsigned long              unthrottle_latency_ns; /*  0x38   0x8 */
    	/* --- cacheline 1 boundary (64 bytes) --- */
    	struct fq_flow             internal __attribute__((__aligned__(64))); /*  0x40  0x80 */
    
    	/* XXX last struct has 16 bytes of padding */
    
    	/* --- cacheline 3 boundary (192 bytes) --- */
    	u32                        quantum;              /*  0xc0   0x4 */
    	u32                        initial_quantum;      /*  0xc4   0x4 */
    	u32                        flow_refill_delay;    /*  0xc8   0x4 */
    	u32                        flow_plimit;          /*  0xcc   0x4 */
    	unsigned long              flow_max_rate;        /*  0xd0   0x8 */
    	u64                        ce_threshold;         /*  0xd8   0x8 */
    	u64                        horizon;              /*  0xe0   0x8 */
    	u32                        orphan_mask;          /*  0xe8   0x4 */
    	u32                        low_rate_threshold;   /*  0xec   0x4 */
    	struct rb_root *           fq_root;              /*  0xf0   0x8 */
    	u8                         rate_enable;          /*  0xf8   0x1 */
    	u8                         fq_trees_log;         /*  0xf9   0x1 */
    	u8                         horizon_drop;         /*  0xfa   0x1 */
    
    	/* XXX 1 byte hole, try to pack */
    
    <bad>	u32                        flows;                /*  0xfc   0x4 */
    	/* --- cacheline 4 boundary (256 bytes) --- */
    	u32                        inactive_flows;       /* 0x100   0x4 */
    	u32                        throttled_flows;      /* 0x104   0x4 */
    	u64                        stat_gc_flows;        /* 0x108   0x8 */
    	u64                        stat_internal_packets; /* 0x110   0x8 */
    	u64                        stat_throttled;       /* 0x118   0x8 */
    	u64                        stat_ce_mark;         /* 0x120   0x8 */
    	u64                        stat_horizon_drops;   /* 0x128   0x8 */
    	u64                        stat_horizon_caps;    /* 0x130   0x8 */
    	u64                        stat_flows_plimit;    /* 0x138   0x8 */
    	/* --- cacheline 5 boundary (320 bytes) --- */
    	u64                        stat_pkts_too_long;   /* 0x140   0x8 */
    	u64                        stat_allocation_errors; /* 0x148   0x8 */
    <bad>	u32                        timer_slack;          /* 0x150   0x4 */
    
    	/* XXX 4 bytes hole, try to pack */
    
    	struct qdisc_watchdog      watchdog;             /* 0x158  0x48 */
    
    	/* size: 448, cachelines: 7, members: 34 */
    	/* sum members: 411, holes: 2, sum holes: 5 */
    	/* padding: 32 */
    	/* paddings: 1, sum paddings: 16 */
    	/* forced alignments: 1 */
    };
    
    pahole output after the patch:
    
    struct fq_sched_data {
    	struct fq_flow_head        new_flows;            /*     0  0x10 */
    	struct fq_flow_head        old_flows;            /*  0x10  0x10 */
    	struct rb_root             delayed;              /*  0x20   0x8 */
    	u64                        time_next_delayed_flow; /*  0x28   0x8 */
    	u64                        ktime_cache;          /*  0x30   0x8 */
    	unsigned long              unthrottle_latency_ns; /*  0x38   0x8 */
    	/* --- cacheline 1 boundary (64 bytes) --- */
    	struct fq_flow             internal __attribute__((__aligned__(64))); /*  0x40  0x80 */
    
    	/* XXX last struct has 16 bytes of padding */
    
    	/* --- cacheline 3 boundary (192 bytes) --- */
    	u32                        quantum;              /*  0xc0   0x4 */
    	u32                        initial_quantum;      /*  0xc4   0x4 */
    	u32                        flow_refill_delay;    /*  0xc8   0x4 */
    	u32                        flow_plimit;          /*  0xcc   0x4 */
    	unsigned long              flow_max_rate;        /*  0xd0   0x8 */
    	u64                        ce_threshold;         /*  0xd8   0x8 */
    	u64                        horizon;              /*  0xe0   0x8 */
    	u32                        orphan_mask;          /*  0xe8   0x4 */
    	u32                        low_rate_threshold;   /*  0xec   0x4 */
    	struct rb_root *           fq_root;              /*  0xf0   0x8 */
    	u8                         rate_enable;          /*  0xf8   0x1 */
    	u8                         fq_trees_log;         /*  0xf9   0x1 */
    	u8                         horizon_drop;         /*  0xfa   0x1 */
    
    	/* XXX 1 byte hole, try to pack */
    
    <good>	u32                        timer_slack;          /*  0xfc   0x4 */
    	/* --- cacheline 4 boundary (256 bytes) --- */
    <good>	u32                        flows;                /* 0x100   0x4 */
    	u32                        inactive_flows;       /* 0x104   0x4 */
    	u32                        throttled_flows;      /* 0x108   0x4 */
    
    	/* XXX 4 bytes hole, try to pack */
    
    	u64                        stat_throttled;       /* 0x110   0x8 */
    <better> struct qdisc_watchdog     watchdog;             /* 0x118  0x48 */
    	/* --- cacheline 5 boundary (320 bytes) was 32 bytes ago --- */
    	u64                        stat_gc_flows;        /* 0x160   0x8 */
    	u64                        stat_internal_packets; /* 0x168   0x8 */
    	u64                        stat_ce_mark;         /* 0x170   0x8 */
    	u64                        stat_horizon_drops;   /* 0x178   0x8 */
    	/* --- cacheline 6 boundary (384 bytes) --- */
    	u64                        stat_horizon_caps;    /* 0x180   0x8 */
    	u64                        stat_flows_plimit;    /* 0x188   0x8 */
    	u64                        stat_pkts_too_long;   /* 0x190   0x8 */
    	u64                        stat_allocation_errors; /* 0x198   0x8 */
    
    	/* Force padding: */
    	u64                        :64;
    	u64                        :64;
    	u64                        :64;
    	u64                        :64;
    
    	/* size: 448, cachelines: 7, members: 34 */
    	/* sum members: 411, holes: 2, sum holes: 5 */
    	/* padding: 32 */
    	/* paddings: 1, sum paddings: 16 */
    	/* forced alignments: 1 */
    };
    Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
    Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
    54ff8ad6
sch_fq.c 26.8 KB