Commit 0210f859 authored by Russ Cox's avatar Russ Cox Committed by Quentin Smith

analysis/app: convert to new benchstat library

Change-Id: I80e47f05530260b09dd92a04ddbf4799731ee99c
Reviewed-on: https://go-review.googlesource.com/35944
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: default avatarQuentin Smith <quentin@golang.org>
parent f01f51ba
...@@ -15,7 +15,7 @@ import ( ...@@ -15,7 +15,7 @@ import (
"strings" "strings"
"unicode" "unicode"
"golang.org/x/perf/analysis/internal/benchstat" "golang.org/x/perf/benchstat"
"golang.org/x/perf/storage/benchfmt" "golang.org/x/perf/storage/benchfmt"
"golang.org/x/perf/storage/query" "golang.org/x/perf/storage/query"
) )
...@@ -52,7 +52,7 @@ func (g *resultGroup) add(res *benchfmt.Result) { ...@@ -52,7 +52,7 @@ func (g *resultGroup) add(res *benchfmt.Result) {
} }
// splitOn returns a new set of groups sharing a common value for key. // splitOn returns a new set of groups sharing a common value for key.
func (g *resultGroup) splitOn(key string) []*resultGroup { func (g *resultGroup) splitOn(key string) ([]string, []*resultGroup) {
groups := make(map[string]*resultGroup) groups := make(map[string]*resultGroup)
var values []string var values []string
for _, res := range g.results { for _, res := range g.results {
...@@ -69,7 +69,7 @@ func (g *resultGroup) splitOn(key string) []*resultGroup { ...@@ -69,7 +69,7 @@ func (g *resultGroup) splitOn(key string) []*resultGroup {
for _, value := range values { for _, value := range values {
out = append(out, groups[value]) out = append(out, groups[value])
} }
return out return values, out
} }
// valueSet is a set of values and the number of results with each value. // valueSet is a set of values and the number of results with each value.
...@@ -264,19 +264,23 @@ func (a *App) compareQuery(q string) *compareData { ...@@ -264,19 +264,23 @@ func (a *App) compareQuery(q string) *compareData {
group := groups[0] group := groups[0]
// Matching a single upload with multiple files -> split by file // Matching a single upload with multiple files -> split by file
if len(group.LabelValues["upload"]) == 1 && len(group.LabelValues["upload-part"]) > 1 { if len(group.LabelValues["upload"]) == 1 && len(group.LabelValues["upload-part"]) > 1 {
groups = group.splitOn("upload-part") var values []string
values, groups = group.splitOn("upload-part")
q := make([]string, len(values))
for i, v := range values {
q[i] = "upload-part:" + v
}
queries = q
} }
} }
// Compute benchstat
var buf bytes.Buffer var buf bytes.Buffer
var results [][]*benchfmt.Result // Compute benchstat
for _, g := range groups { c := new(benchstat.Collection)
results = append(results, g.results) for i, g := range groups {
c.AddResults(queries[i], g.results)
} }
benchstat.Run(&buf, results, &benchstat.Options{ benchstat.FormatHTML(&buf, c.Tables())
HTML: true,
})
// Prepare struct for template. // Prepare struct for template.
labels := make(map[string]bool) labels := make(map[string]bool)
......
...@@ -39,13 +39,6 @@ td.count { ...@@ -39,13 +39,6 @@ td.count {
border-collapse: collapse; border-collapse: collapse;
border-bottom: 1px solid black; border-bottom: 1px solid black;
} }
table.benchstat {
border-collapse: collapse;
}
table.benchstat td, table.benchstat th {
padding-right: 2px;
padding-bottom: 2px;
}
#labels > tbody > tr:last-child > th, #labels > tbody > tr:last-child > td { #labels > tbody > tr:last-child > th, #labels > tbody > tr:last-child > td {
padding-bottom: 1em; padding-bottom: 1em;
} }
...@@ -66,6 +59,15 @@ table.benchstat td, table.benchstat th { ...@@ -66,6 +59,15 @@ table.benchstat td, table.benchstat th {
overflow: hidden; overflow: hidden;
text-overflow: ellipsis; text-overflow: ellipsis;
} }
.benchstat { border-collapse: collapse; }
.benchstat th:nth-child(1) { text-align: left; }
.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }
.benchstat tr:not(.configs) th { border-top: 1px solid #666; border-bottom: 1px solid #ccc; }
.benchstat .nodelta { text-align: center !important; }
.benchstat .better td.delta { font-weight: bold; }
.benchstat .worse td.delta { font-weight: bold; color: #c00; }
</style> </style>
</head> </head>
<body> <body>
......
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Benchstat computes and compares statistics about benchmarks.
//
// Each input file should contain the concatenated output of a number
// of runs of ``go test -bench.'' For each different benchmark listed in an input file,
// benchstat computes the mean, minimum, and maximum run time,
// after removing outliers using the interquartile range rule.
//
// If invoked on a single input file, benchstat prints the per-benchmark statistics
// for that file.
//
// If invoked on a pair of input files, benchstat adds to the output a column
// showing the statistics from the second file and a column showing the
// percent change in mean from the first to the second file.
// Next to the percent change, benchstat shows the p-value and sample
// sizes from a test of the two distributions of benchmark times.
// Small p-values indicate that the two distributions are significantly different.
// If the test indicates that there was no significant change between the two
// benchmarks (defined as p > 0.05), benchstat displays a single ~ instead of
// the percent change.
//
// The -delta-test option controls which significance test is applied:
// utest (Mann-Whitney U-test), ttest (two-sample Welch t-test), or none.
// The default is the U-test, sometimes also referred to as the Wilcoxon rank
// sum test.
//
// If invoked on more than two input files, benchstat prints the per-benchmark
// statistics for all the files, showing one column of statistics for each file,
// with no column for percent change or statistical significance.
//
// The -html option causes benchstat to print the results as an HTML table.
package benchstat
import (
"flag"
"fmt"
"html"
"io"
"strconv"
"strings"
"unicode/utf8"
"github.com/aclements/go-moremath/stats"
"golang.org/x/perf/storage/benchfmt"
)
var (
flagHTML = flag.Bool("html", false, "print results as an HTML table")
)
type row struct {
cols []string
}
func newRow(cols ...string) *row {
return &row{cols: cols}
}
func (r *row) add(col string) {
r.cols = append(r.cols, col)
}
func (r *row) trim() {
for len(r.cols) > 0 && r.cols[len(r.cols)-1] == "" {
r.cols = r.cols[:len(r.cols)-1]
}
}
type Options struct {
// DeltaTest is the significance test to apply to delta.
// This package provides Utest, Ttest, and Notest. If nil, Utest will be used.
DeltaTest func(old, new *Benchstat) (pval float64, err error)
// Alpha is the threshold to consider a change significant if p < α
Alpha float64
// Geomean will add a row for the geometric mean of each file
Geomean bool
// HTML will print the results as an HTML table
HTML bool
}
func Run(w io.Writer, results [][]*benchfmt.Result, options *Options) {
if options == nil {
options = &Options{}
}
if options.DeltaTest == nil {
options.DeltaTest = Utest
}
if options.Alpha == 0 {
options.Alpha = .05
}
// Read in benchmark data.
c := readResults(results)
for _, stat := range c.Stats {
stat.ComputeStats()
}
var tables [][]*row
switch len(c.Configs) {
case 2:
before, after := c.Configs[0], c.Configs[1]
key := BenchKey{}
for _, key.Unit = range c.Units {
var table []*row
metric := metricOf(key.Unit)
for _, key.Benchmark = range c.Benchmarks {
key.Config = before
old := c.Stats[key]
key.Config = after
new := c.Stats[key]
if old == nil || new == nil {
continue
}
if len(table) == 0 {
table = append(table, newRow("name", "old "+metric, "new "+metric, "delta"))
}
pval, testerr := options.DeltaTest(old, new)
scaler := newScaler(old.Mean, old.Unit)
row := newRow(key.Benchmark, old.Format(scaler), new.Format(scaler), "~ ")
if testerr == stats.ErrZeroVariance {
row.add("(zero variance)")
} else if testerr == stats.ErrSampleSize {
row.add("(too few samples)")
} else if testerr == stats.ErrSamplesEqual {
row.add("(all equal)")
} else if testerr != nil {
row.add(fmt.Sprintf("(%s)", testerr))
} else if pval < options.Alpha {
row.cols[3] = fmt.Sprintf("%+.2f%%", ((new.Mean/old.Mean)-1.0)*100.0)
}
if len(row.cols) == 4 && pval != -1 {
row.add(fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues)))
}
table = append(table, row)
}
if len(table) > 0 {
if options.Geomean {
table = addGeomean(table, c, key.Unit, true)
}
tables = append(tables, table)
}
}
default:
key := BenchKey{}
for _, key.Unit = range c.Units {
var table []*row
metric := metricOf(key.Unit)
if len(c.Configs) > 1 {
hdr := newRow("name \\ " + metric)
for _, config := range c.Configs {
hdr.add(config)
}
table = append(table, hdr)
} else {
table = append(table, newRow("name", metric))
}
for _, key.Benchmark = range c.Benchmarks {
row := newRow(key.Benchmark)
var scaler func(float64) string
for _, key.Config = range c.Configs {
stat := c.Stats[key]
if stat == nil {
row.add("")
continue
}
if scaler == nil {
scaler = newScaler(stat.Mean, stat.Unit)
}
row.add(stat.Format(scaler))
}
row.trim()
if len(row.cols) > 1 {
table = append(table, row)
}
}
if options.Geomean {
table = addGeomean(table, c, key.Unit, false)
}
tables = append(tables, table)
}
}
numColumn := 0
for _, table := range tables {
for _, row := range table {
if numColumn < len(row.cols) {
numColumn = len(row.cols)
}
}
}
max := make([]int, numColumn)
for _, table := range tables {
for _, row := range table {
for i, s := range row.cols {
n := utf8.RuneCountInString(s)
if max[i] < n {
max[i] = n
}
}
}
}
for i, table := range tables {
if i > 0 {
fmt.Fprintf(w, "\n")
}
if options.HTML {
fmt.Fprintf(w, "<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>\n")
fmt.Fprintf(w, "<table class='benchstat'>\n")
printRow := func(row *row, tag string) {
fmt.Fprintf(w, "<tr>")
for _, cell := range row.cols {
fmt.Fprintf(w, "<%s>%s</%s>", tag, html.EscapeString(cell), tag)
}
fmt.Fprintf(w, "\n")
}
printRow(table[0], "th")
for _, row := range table[1:] {
printRow(row, "td")
}
fmt.Fprintf(w, "</table>\n")
continue
}
// headings
row := table[0]
for i, s := range row.cols {
switch i {
case 0:
fmt.Fprintf(w, "%-*s", max[i], s)
default:
fmt.Fprintf(w, " %-*s", max[i], s)
case len(row.cols) - 1:
fmt.Fprintf(w, " %s\n", s)
}
}
// data
for _, row := range table[1:] {
for i, s := range row.cols {
switch i {
case 0:
fmt.Fprintf(w, "%-*s", max[i], s)
default:
if i == len(row.cols)-1 && len(s) > 0 && s[0] == '(' {
// Left-align p value.
fmt.Fprintf(w, " %s", s)
break
}
fmt.Fprintf(w, " %*s", max[i], s)
}
}
fmt.Fprintf(w, "\n")
}
}
}
func addGeomean(table []*row, c *Collection, unit string, delta bool) []*row {
row := newRow("[Geo mean]")
key := BenchKey{Unit: unit}
geomeans := []float64{}
for _, key.Config = range c.Configs {
var means []float64
for _, key.Benchmark = range c.Benchmarks {
stat := c.Stats[key]
if stat != nil {
means = append(means, stat.Mean)
}
}
if len(means) == 0 {
row.add("")
delta = false
} else {
geomean := stats.GeoMean(means)
geomeans = append(geomeans, geomean)
row.add(newScaler(geomean, unit)(geomean) + " ")
}
}
if delta {
row.add(fmt.Sprintf("%+.2f%%", ((geomeans[1]/geomeans[0])-1.0)*100.0))
}
return append(table, row)
}
func timeScaler(ns float64) func(float64) string {
var format string
var scale float64
switch x := ns / 1e9; {
case x >= 99.5:
format, scale = "%.0fs", 1
case x >= 9.95:
format, scale = "%.1fs", 1
case x >= 0.995:
format, scale = "%.2fs", 1
case x >= 0.0995:
format, scale = "%.0fms", 1000
case x >= 0.00995:
format, scale = "%.1fms", 1000
case x >= 0.000995:
format, scale = "%.2fms", 1000
case x >= 0.0000995:
format, scale = "%.0fµs", 1000*1000
case x >= 0.00000995:
format, scale = "%.1fµs", 1000*1000
case x >= 0.000000995:
format, scale = "%.2fµs", 1000*1000
case x >= 0.0000000995:
format, scale = "%.0fns", 1000*1000*1000
case x >= 0.00000000995:
format, scale = "%.1fns", 1000*1000*1000
default:
format, scale = "%.2fns", 1000*1000*1000
}
return func(ns float64) string {
return fmt.Sprintf(format, ns/1e9*scale)
}
}
func newScaler(val float64, unit string) func(float64) string {
if unit == "ns/op" {
return timeScaler(val)
}
var format string
var scale float64
var suffix string
prescale := 1.0
if unit == "MB/s" {
prescale = 1e6
}
switch x := val * prescale; {
case x >= 99500000000000:
format, scale, suffix = "%.0f", 1e12, "T"
case x >= 9950000000000:
format, scale, suffix = "%.1f", 1e12, "T"
case x >= 995000000000:
format, scale, suffix = "%.2f", 1e12, "T"
case x >= 99500000000:
format, scale, suffix = "%.0f", 1e9, "G"
case x >= 9950000000:
format, scale, suffix = "%.1f", 1e9, "G"
case x >= 995000000:
format, scale, suffix = "%.2f", 1e9, "G"
case x >= 99500000:
format, scale, suffix = "%.0f", 1e6, "M"
case x >= 9950000:
format, scale, suffix = "%.1f", 1e6, "M"
case x >= 995000:
format, scale, suffix = "%.2f", 1e6, "M"
case x >= 99500:
format, scale, suffix = "%.0f", 1e3, "k"
case x >= 9950:
format, scale, suffix = "%.1f", 1e3, "k"
case x >= 995:
format, scale, suffix = "%.2f", 1e3, "k"
case x >= 99.5:
format, scale, suffix = "%.0f", 1, ""
case x >= 9.95:
format, scale, suffix = "%.1f", 1, ""
default:
format, scale, suffix = "%.2f", 1, ""
}
if unit == "B/op" {
suffix += "B"
}
if unit == "MB/s" {
suffix += "B/s"
}
scale /= prescale
return func(val float64) string {
return fmt.Sprintf(format+suffix, val/scale)
}
}
func (b *Benchstat) Format(scaler func(float64) string) string {
diff := 1 - b.Min/b.Mean
if d := b.Max/b.Mean - 1; d > diff {
diff = d
}
s := scaler(b.Mean)
if b.Mean == 0 {
s += " "
} else {
s = fmt.Sprintf("%s ±%3s", s, fmt.Sprintf("%.0f%%", diff*100.0))
}
return s
}
// ComputeStats updates the derived statistics in s from the raw
// samples in s.Values.
func (stat *Benchstat) ComputeStats() {
// Discard outliers.
values := stats.Sample{Xs: stat.Values}
q1, q3 := values.Percentile(0.25), values.Percentile(0.75)
lo, hi := q1-1.5*(q3-q1), q3+1.5*(q3-q1)
for _, value := range stat.Values {
if lo <= value && value <= hi {
stat.RValues = append(stat.RValues, value)
}
}
// Compute statistics of remaining data.
stat.Min, stat.Max = stats.Bounds(stat.RValues)
stat.Mean = stats.Mean(stat.RValues)
}
// A Benchstat is the metrics along one axis (e.g., ns/op or MB/s)
// for all runs of a specific benchmark.
type Benchstat struct {
Unit string
Values []float64 // metrics
RValues []float64 // metrics with outliers removed
Min float64 // min of RValues
Mean float64 // mean of RValues
Max float64 // max of RValues
}
// A BenchKey identifies one metric (e.g., "ns/op", "B/op") from one
// benchmark (function name sans "Benchmark" prefix) in one
// configuration (input file name).
type BenchKey struct {
Config, Benchmark, Unit string
}
type Collection struct {
Stats map[BenchKey]*Benchstat
// Configs, Benchmarks, and Units give the set of configs,
// benchmarks, and units from the keys in Stats in an order
// meant to match the order the benchmarks were read in.
Configs, Benchmarks, Units []string
}
func (c *Collection) AddStat(key BenchKey) *Benchstat {
if stat, ok := c.Stats[key]; ok {
return stat
}
addString := func(strings *[]string, add string) {
for _, s := range *strings {
if s == add {
return
}
}
*strings = append(*strings, add)
}
addString(&c.Configs, key.Config)
addString(&c.Benchmarks, key.Benchmark)
addString(&c.Units, key.Unit)
stat := &Benchstat{Unit: key.Unit}
c.Stats[key] = stat
return stat
}
// readResults parses all sets of results
func readResults(results [][]*benchfmt.Result) *Collection {
c := Collection{Stats: make(map[BenchKey]*Benchstat)}
for _, group := range results {
readGroup(group, &c)
}
return &c
}
// readGroup parses a single set of benchmarks in to a Collection.
func readGroup(group []*benchfmt.Result, c *Collection) {
// TODO(quentin): Pick a better name for each group
name := fmt.Sprintf("#%d", len(c.Configs))
c.Configs = append(c.Configs, name)
key := BenchKey{Config: name}
for _, result := range group {
line := result.Content
f := strings.Fields(line)
if len(f) < 4 {
continue
}
name := f[0]
if !strings.HasPrefix(name, "Benchmark") {
continue
}
name = strings.TrimPrefix(name, "Benchmark")
n, _ := strconv.Atoi(f[1])
if n == 0 {
continue
}
key.Benchmark = name
for i := 2; i+2 <= len(f); i += 2 {
val, err := strconv.ParseFloat(f[i], 64)
if err != nil {
continue
}
key.Unit = f[i+1]
stat := c.AddStat(key)
stat.Values = append(stat.Values, val)
}
}
}
func metricOf(unit string) string {
switch unit {
case "ns/op":
return "time/op"
case "B/op":
return "alloc/op"
case "MB/s":
return "speed"
default:
return unit
}
}
// Significance tests.
func Notest(old, new *Benchstat) (pval float64, err error) {
return -1, nil
}
func Ttest(old, new *Benchstat) (pval float64, err error) {
t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers)
if err != nil {
return -1, err
}
return t.P, nil
}
func Utest(old, new *Benchstat) (pval float64, err error) {
u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers)
if err != nil {
return -1, err
}
return u.P, nil
}
...@@ -11,22 +11,20 @@ import ( ...@@ -11,22 +11,20 @@ import (
) )
var htmlTemplate = template.Must(template.New("").Funcs(htmlFuncs).Parse(` var htmlTemplate = template.Must(template.New("").Funcs(htmlFuncs).Parse(`
{{- if . -}}
{{with index . 0}} {{with index . 0}}
<table class='benchstat {{if .OldNewDelta}}oldnew{{end}}'> <table class='benchstat {{if .OldNewDelta}}oldnew{{end}}'>
{{if .OldNewDelta -}} {{if eq (len .Configs) 1}}
{{- else if eq (len .Configs) 1}}
{{- else -}} {{- else -}}
<tr class='configs'><th>{{range .Configs}}<th>{{.}}{{end}} <tr class='configs'><th>{{range .Configs}}<th>{{.}}{{end}}
{{end}} {{end}}
{{end}} {{end}}
{{- range $i, $table := .}} {{- range $i, $table := .}}
<tbody> <tbody>
{{if .OldNewDelta -}} {{if eq (len .Configs) 1}}
<tr><th><th>old {{.Metric}}<th>new {{.Metric}}<th>delta<th>
{{else if eq (len .Configs) 1}}
<tr><th><th>{{.Metric}} <tr><th><th>{{.Metric}}
{{else -}} {{else -}}
<tr><th><th colspan='{{len .Configs}}' class='metric'>{{.Metric}} <tr><th><th colspan='{{len .Configs}}' class='metric'>{{.Metric}}{{if .OldNewDelta}}<th>delta{{end}}
{{end}}{{range $row := $table.Rows -}} {{end}}{{range $row := $table.Rows -}}
{{if $table.OldNewDelta -}} {{if $table.OldNewDelta -}}
<tr class='{{if eq .Change 1}}better{{else if eq .Change -1}}worse{{else}}unchanged{{end}}'> <tr class='{{if eq .Change 1}}better{{else if eq .Change -1}}worse{{else}}unchanged{{end}}'>
...@@ -39,6 +37,7 @@ var htmlTemplate = template.Must(template.New("").Funcs(htmlFuncs).Parse(` ...@@ -39,6 +37,7 @@ var htmlTemplate = template.Must(template.New("").Funcs(htmlFuncs).Parse(`
</tbody> </tbody>
{{end}} {{end}}
</table> </table>
{{end -}}
`)) `))
var htmlFuncs = template.FuncMap{ var htmlFuncs = template.FuncMap{
......
...@@ -76,7 +76,7 @@ func (c *Collection) Tables() []*Table { ...@@ -76,7 +76,7 @@ func (c *Collection) Tables() []*Table {
new := c.Metrics[k1] new := c.Metrics[k1]
// If one is missing, omit row entirely. // If one is missing, omit row entirely.
// TODO: Control this better. // TODO: Control this better.
if old == new || new == nil { if old == nil || new == nil {
continue continue
} }
pval, testerr := deltaTest(old, new) pval, testerr := deltaTest(old, new)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment