Commit bcdc6efa authored by David S. Miller's avatar David S. Miller

Merge branch 'net_proc_perf'

Jia He says:

====================
Reduce cache miss for snmp_fold_field

In a PowerPc server with large cpu number(160), besides commit
a3a77372 ("net: Optimize snmp stat aggregation by walking all
the percpu data at once"), I watched several other snmp_fold_field
callsites which would cause high cache miss rate.

test source code:
================
My simple test case, which read from the procfs items endlessly:
/***********************************************************/
int main(int argc, char **argv)
{
        int i;
        int fd = -1 ;
        int rdsize = 0;
        char buf[LINELEN+1];

        buf[LINELEN] = 0;
        memset(buf,0,LINELEN);

        if(1 >= argc) {
                printf("file name empty\n");
                return -1;
        }

        fd = open(argv[1], O_RDWR, 0644);
        if(0 > fd){
                printf("open error\n");
                return -2;
        }

        for(i=0;i<0xffffffff;i++) {
                while(0 < (rdsize = read(fd,buf,LINELEN))){
                        //nothing here
                }

                lseek(fd, 0, SEEK_SET);
        }

        close(fd);
        return 0;
}
/**********************************************************/

compile and run:
================
gcc test.c -o test

perf stat -d -e cache-misses ./test /proc/net/snmp
perf stat -d -e cache-misses ./test /proc/net/snmp6
perf stat -d -e cache-misses ./test /proc/net/sctp/snmp
perf stat -d -e cache-misses ./test /proc/net/xfrm_stat

before the patch set:
====================
 Performance counter stats for 'system wide':

         355911097      cache-misses                                                 [40.08%]
        2356829300      L1-dcache-loads                                              [60.04%]
         355642645      L1-dcache-load-misses     #   15.09% of all L1-dcache hits   [60.02%]
         346544541      LLC-loads                                                    [59.97%]
            389763      LLC-load-misses           #    0.11% of all LL-cache hits    [40.02%]

       6.245162638 seconds time elapsed

After the patch set:
===================
 Performance counter stats for 'system wide':

         194992476      cache-misses                                                 [40.03%]
        6718051877      L1-dcache-loads                                              [60.07%]
         194871921      L1-dcache-load-misses     #    2.90% of all L1-dcache hits   [60.11%]
         187632232      LLC-loads                                                    [60.04%]
            464466      LLC-load-misses           #    0.25% of all LL-cache hits    [39.89%]

       6.868422769 seconds time elapsed
The cache-miss rate can be reduced from 15% to 2.9%

changelog
=========
v6:
- correct v5
v5:
- order local variables from longest to shortest line
v4:
- move memset into one block of if statement in snmp6_seq_show_item
- remove the changes in netstat_seq_show considerred the stack usage is too large
v3:
- introduce generic interface (suggested by Marcelo Ricardo Leitner)
- use max_t instead of self defined macro (suggested by David Miller)
v2:
- fix bug in udplite statistics.
- snmp_seq_show is split into 2 parts
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fa140354 6d4a741c
...@@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o ...@@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
} }
#endif #endif
#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \
{ \
int i, c; \
for_each_possible_cpu(c) { \
for (i = 0; stats_list[i].name; i++) \
buff64[i] += snmp_get_cpu_field64( \
mib_statistic, \
c, stats_list[i].entry, \
offset); \
} \
}
#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \
{ \
int i, c; \
for_each_possible_cpu(c) { \
for (i = 0; stats_list[i].name; i++) \
buff[i] += snmp_get_cpu_field( \
mib_statistic, \
c, stats_list[i].entry); \
} \
}
void inet_get_local_port_range(struct net *net, int *low, int *high); void inet_get_local_port_range(struct net *net, int *low, int *high);
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
......
...@@ -46,6 +46,8 @@ ...@@ -46,6 +46,8 @@
#include <net/sock.h> #include <net/sock.h>
#include <net/raw.h> #include <net/raw.h>
#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX)
/* /*
* Report socket allocation statistics [mea@utu.fi] * Report socket allocation statistics [mea@utu.fi]
*/ */
...@@ -356,22 +358,22 @@ static void icmp_put(struct seq_file *seq) ...@@ -356,22 +358,22 @@ static void icmp_put(struct seq_file *seq)
atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
for (i = 0; icmpmibmap[i].name != NULL; i++) for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " In%s", icmpmibmap[i].name); seq_printf(seq, " In%s", icmpmibmap[i].name);
seq_puts(seq, " OutMsgs OutErrors"); seq_puts(seq, " OutMsgs OutErrors");
for (i = 0; icmpmibmap[i].name != NULL; i++) for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu %lu", seq_printf(seq, "\nIcmp: %lu %lu %lu",
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS),
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS),
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
for (i = 0; icmpmibmap[i].name != NULL; i++) for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu", seq_printf(seq, " %lu",
atomic_long_read(ptr + icmpmibmap[i].index)); atomic_long_read(ptr + icmpmibmap[i].index));
seq_printf(seq, " %lu %lu", seq_printf(seq, " %lu %lu",
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
for (i = 0; icmpmibmap[i].name != NULL; i++) for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu", seq_printf(seq, " %lu",
atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
} }
...@@ -379,14 +381,16 @@ static void icmp_put(struct seq_file *seq) ...@@ -379,14 +381,16 @@ static void icmp_put(struct seq_file *seq)
/* /*
* Called from the PROCfs module. This outputs /proc/net/snmp. * Called from the PROCfs module. This outputs /proc/net/snmp.
*/ */
static int snmp_seq_show(struct seq_file *seq, void *v) static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
{ {
int i;
struct net *net = seq->private; struct net *net = seq->private;
u64 buff64[IPSTATS_MIB_MAX];
int i;
seq_puts(seq, "Ip: Forwarding DefaultTTL"); memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64));
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_puts(seq, "Ip: Forwarding DefaultTTL");
for (i = 0; snmp4_ipstats_list[i].name; i++)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d", seq_printf(seq, "\nIp: %d %d",
...@@ -394,57 +398,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v) ...@@ -394,57 +398,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
net->ipv4.sysctl_ip_default_ttl); net->ipv4.sysctl_ip_default_ttl);
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
seq_printf(seq, " %llu", net->mib.ip_statistics,
snmp_fold_field64(net->mib.ip_statistics, offsetof(struct ipstats_mib, syncp));
snmp4_ipstats_list[i].entry, for (i = 0; snmp4_ipstats_list[i].name; i++)
offsetof(struct ipstats_mib, syncp))); seq_printf(seq, " %llu", buff64[i]);
icmp_put(seq); /* RFC 2011 compatibility */ return 0;
icmpmsg_put(seq); }
static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
{
unsigned long buff[TCPUDP_MIB_MAX];
struct net *net = seq->private;
int i;
memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
seq_puts(seq, "\nTcp:"); seq_puts(seq, "\nTcp:");
for (i = 0; snmp4_tcp_list[i].name != NULL; i++) for (i = 0; snmp4_tcp_list[i].name; i++)
seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_printf(seq, " %s", snmp4_tcp_list[i].name);
seq_puts(seq, "\nTcp:"); seq_puts(seq, "\nTcp:");
for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { snmp_get_cpu_field_batch(buff, snmp4_tcp_list,
net->mib.tcp_statistics);
for (i = 0; snmp4_tcp_list[i].name; i++) {
/* MaxConn field is signed, RFC 2012 */ /* MaxConn field is signed, RFC 2012 */
if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
seq_printf(seq, " %ld", seq_printf(seq, " %ld", buff[i]);
snmp_fold_field(net->mib.tcp_statistics,
snmp4_tcp_list[i].entry));
else else
seq_printf(seq, " %lu", seq_printf(seq, " %lu", buff[i]);
snmp_fold_field(net->mib.tcp_statistics,
snmp4_tcp_list[i].entry));
} }
memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
snmp_get_cpu_field_batch(buff, snmp4_udp_list,
net->mib.udp_statistics);
seq_puts(seq, "\nUdp:"); seq_puts(seq, "\nUdp:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++) for (i = 0; snmp4_udp_list[i].name; i++)
seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_printf(seq, " %s", snmp4_udp_list[i].name);
seq_puts(seq, "\nUdp:"); seq_puts(seq, "\nUdp:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++) for (i = 0; snmp4_udp_list[i].name; i++)
seq_printf(seq, " %lu", seq_printf(seq, " %lu", buff[i]);
snmp_fold_field(net->mib.udp_statistics,
snmp4_udp_list[i].entry)); memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
/* the UDP and UDP-Lite MIBs are the same */ /* the UDP and UDP-Lite MIBs are the same */
seq_puts(seq, "\nUdpLite:"); seq_puts(seq, "\nUdpLite:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++) snmp_get_cpu_field_batch(buff, snmp4_udp_list,
net->mib.udplite_statistics);
for (i = 0; snmp4_udp_list[i].name; i++)
seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_printf(seq, " %s", snmp4_udp_list[i].name);
seq_puts(seq, "\nUdpLite:"); seq_puts(seq, "\nUdpLite:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++) for (i = 0; snmp4_udp_list[i].name; i++)
seq_printf(seq, " %lu", seq_printf(seq, " %lu", buff[i]);
snmp_fold_field(net->mib.udplite_statistics,
snmp4_udp_list[i].entry));
seq_putc(seq, '\n'); seq_putc(seq, '\n');
return 0; return 0;
} }
static int snmp_seq_show(struct seq_file *seq, void *v)
{
snmp_seq_show_ipstats(seq, v);
icmp_put(seq); /* RFC 2011 compatibility */
icmpmsg_put(seq);
snmp_seq_show_tcp_udp(seq, v);
return 0;
}
static int snmp_seq_open(struct inode *inode, struct file *file) static int snmp_seq_open(struct inode *inode, struct file *file)
{ {
return single_open_net(inode, file, snmp_seq_show); return single_open_net(inode, file, snmp_seq_show);
...@@ -469,21 +493,21 @@ static int netstat_seq_show(struct seq_file *seq, void *v) ...@@ -469,21 +493,21 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
struct net *net = seq->private; struct net *net = seq->private;
seq_puts(seq, "TcpExt:"); seq_puts(seq, "TcpExt:");
for (i = 0; snmp4_net_list[i].name != NULL; i++) for (i = 0; snmp4_net_list[i].name; i++)
seq_printf(seq, " %s", snmp4_net_list[i].name); seq_printf(seq, " %s", snmp4_net_list[i].name);
seq_puts(seq, "\nTcpExt:"); seq_puts(seq, "\nTcpExt:");
for (i = 0; snmp4_net_list[i].name != NULL; i++) for (i = 0; snmp4_net_list[i].name; i++)
seq_printf(seq, " %lu", seq_printf(seq, " %lu",
snmp_fold_field(net->mib.net_statistics, snmp_fold_field(net->mib.net_statistics,
snmp4_net_list[i].entry)); snmp4_net_list[i].entry));
seq_puts(seq, "\nIpExt:"); seq_puts(seq, "\nIpExt:");
for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) for (i = 0; snmp4_ipextstats_list[i].name; i++)
seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); seq_printf(seq, " %s", snmp4_ipextstats_list[i].name);
seq_puts(seq, "\nIpExt:"); seq_puts(seq, "\nIpExt:");
for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) for (i = 0; snmp4_ipextstats_list[i].name; i++)
seq_printf(seq, " %llu", seq_printf(seq, " %llu",
snmp_fold_field64(net->mib.ip_statistics, snmp_fold_field64(net->mib.ip_statistics,
snmp4_ipextstats_list[i].entry, snmp4_ipextstats_list[i].entry,
......
...@@ -4961,18 +4961,18 @@ static inline size_t inet6_if_nlmsg_size(void) ...@@ -4961,18 +4961,18 @@ static inline size_t inet6_if_nlmsg_size(void)
} }
static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
int items, int bytes) int bytes)
{ {
int i; int i;
int pad = bytes - sizeof(u64) * items; int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX;
BUG_ON(pad < 0); BUG_ON(pad < 0);
/* Use put_unaligned() because stats may not be aligned for u64. */ /* Use put_unaligned() because stats may not be aligned for u64. */
put_unaligned(items, &stats[0]); put_unaligned(ICMP6_MIB_MAX, &stats[0]);
for (i = 1; i < items; i++) for (i = 1; i < ICMP6_MIB_MAX; i++)
put_unaligned(atomic_long_read(&mib[i]), &stats[i]); put_unaligned(atomic_long_read(&mib[i]), &stats[i]);
memset(&stats[items], 0, pad); memset(&stats[ICMP6_MIB_MAX], 0, pad);
} }
static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
...@@ -5005,7 +5005,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, ...@@ -5005,7 +5005,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
offsetof(struct ipstats_mib, syncp)); offsetof(struct ipstats_mib, syncp));
break; break;
case IFLA_INET6_ICMP6STATS: case IFLA_INET6_ICMP6STATS:
__snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes);
break; break;
} }
} }
......
...@@ -30,6 +30,11 @@ ...@@ -30,6 +30,11 @@
#include <net/transp_v6.h> #include <net/transp_v6.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#define MAX4(a, b, c, d) \
max_t(u32, max_t(u32, a, b), max_t(u32, c, d))
#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \
IPSTATS_MIB_MAX, ICMP_MIB_MAX)
static int sockstat6_seq_show(struct seq_file *seq, void *v) static int sockstat6_seq_show(struct seq_file *seq, void *v)
{ {
struct net *net = seq->private; struct net *net = seq->private;
...@@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, ...@@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
atomic_long_t *smib, atomic_long_t *smib,
const struct snmp_mib *itemlist) const struct snmp_mib *itemlist)
{ {
unsigned long buff[SNMP_MIB_MAX];
int i; int i;
unsigned long val;
for (i = 0; itemlist[i].name; i++) { if (pcpumib) {
val = pcpumib ? memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
snmp_fold_field(pcpumib, itemlist[i].entry) :
atomic_long_read(smib + itemlist[i].entry); snmp_get_cpu_field_batch(buff, itemlist, pcpumib);
seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); for (i = 0; itemlist[i].name; i++)
seq_printf(seq, "%-32s\t%lu\n",
itemlist[i].name, buff[i]);
} else {
for (i = 0; itemlist[i].name; i++)
seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
atomic_long_read(smib + itemlist[i].entry));
} }
} }
static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
const struct snmp_mib *itemlist, size_t syncpoff) const struct snmp_mib *itemlist, size_t syncpoff)
{ {
u64 buff64[SNMP_MIB_MAX];
int i; int i;
memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
for (i = 0; itemlist[i].name; i++) for (i = 0; itemlist[i].name; i++)
seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]);
snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
} }
static int snmp6_seq_show(struct seq_file *seq, void *v) static int snmp6_seq_show(struct seq_file *seq, void *v)
......
...@@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = { ...@@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = {
/* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
static int sctp_snmp_seq_show(struct seq_file *seq, void *v) static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
{ {
unsigned long buff[SCTP_MIB_MAX];
struct net *net = seq->private; struct net *net = seq->private;
int i; int i;
for (i = 0; sctp_snmp_list[i].name != NULL; i++) memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX);
snmp_get_cpu_field_batch(buff, sctp_snmp_list,
net->sctp.sctp_statistics);
for (i = 0; sctp_snmp_list[i].name; i++)
seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
snmp_fold_field(net->sctp.sctp_statistics, buff[i]);
sctp_snmp_list[i].entry));
return 0; return 0;
} }
......
...@@ -50,12 +50,18 @@ static const struct snmp_mib xfrm_mib_list[] = { ...@@ -50,12 +50,18 @@ static const struct snmp_mib xfrm_mib_list[] = {
static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
{ {
unsigned long buff[LINUX_MIB_XFRMMAX];
struct net *net = seq->private; struct net *net = seq->private;
int i; int i;
memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX);
snmp_get_cpu_field_batch(buff, xfrm_mib_list,
net->mib.xfrm_statistics);
for (i = 0; xfrm_mib_list[i].name; i++) for (i = 0; xfrm_mib_list[i].name; i++)
seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
snmp_fold_field(net->mib.xfrm_statistics, buff[i]);
xfrm_mib_list[i].entry));
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment