Commit 35ee1bd0 authored by Russ Cox's avatar Russ Cox

benchstat: split significance tests into delta.go

Change-Id: Ied07ec9cec690a1927e9e32fc8d47f36521f658f
Reviewed-on: https://go-review.googlesource.com/35935Reviewed-by: default avatarQuentin Smith <quentin@golang.org>
parent 83910fa1
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Significance tests.
package main
import (
"errors"
"golang.org/x/perf/internal/stats"
)
// A DeltaTest compares the old and new metrics and returns the
// expected probability that they are drawn from the same distribution.
//
// If a probability cannot be computed, the DeltaTest returns an
// error explaining why. Common errors include ErrSamplesEqual
// (all samples are equal), ErrSampleSize (there aren't enough samples),
// and ErrZeroVariance (the sample has zero variance).
//
// As a special case, the missing test NoDeltaTest returns -1, nil.
type DeltaTest func(old, new *Metrics) (float64, error)
// Errors returned by DeltaTest.
var (
ErrSamplesEqual = errors.New("all equal")
ErrSampleSize = errors.New("too few samples")
ErrZeroVariance = errors.New("zero variance")
)
// NoDeltaTest applies no delta test; it returns -1, nil.
func NoDeltaTest(old, new *Metrics) (pval float64, err error) {
return -1, nil
}
// TTest is a DeltaTest using the two-sample Welch t-test.
func TTest(old, new *Metrics) (pval float64, err error) {
t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers)
if err != nil {
return -1, convertErr(err)
}
return t.P, nil
}
// UTest is a DeltaTest using the Mann-Whitney U test.
func UTest(old, new *Metrics) (pval float64, err error) {
u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers)
if err != nil {
return -1, convertErr(err)
}
return u.P, nil
}
// convertErr converts from the stats package's internal errors
// to errors exported by this package and expected from
// a DeltaTest.
// Using different errors makes it possible for clients to use
// package benchstat without access to the internal stats package,
// and it also gives us a chance to use shorter error messages.
func convertErr(err error) error {
switch err {
case stats.ErrZeroVariance:
return ErrZeroVariance
case stats.ErrSampleSize:
return ErrSampleSize
case stats.ErrSamplesEqual:
return ErrSamplesEqual
}
return err
}
......@@ -119,14 +119,14 @@ var (
flagHTML = flag.Bool("html", false, "print results as an HTML table")
)
var deltaTestNames = map[string]func(old, new *Metrics) (float64, error){
"none": notest,
"u": utest,
"u-test": utest,
"utest": utest,
"t": ttest,
"t-test": ttest,
"ttest": ttest,
var deltaTestNames = map[string]DeltaTest{
"none": NoDeltaTest,
"u": UTest,
"u-test": UTest,
"utest": UTest,
"t": TTest,
"t-test": TTest,
"ttest": TTest,
}
type row struct {
......@@ -187,13 +187,7 @@ func main() {
scaler := NewScaler(old.Mean, old.Unit)
row := newRow(key.Benchmark, old.Format(scaler), new.Format(scaler), "~ ")
if testerr == stats.ErrZeroVariance {
row.add("(zero variance)")
} else if testerr == stats.ErrSampleSize {
row.add("(too few samples)")
} else if testerr == stats.ErrSamplesEqual {
row.add("(all equal)")
} else if testerr != nil {
if testerr != nil {
row.add(fmt.Sprintf("(%s)", testerr))
} else if pval < *flagAlpha {
row.cols[3] = fmt.Sprintf("%+.2f%%", ((new.Mean/old.Mean)-1.0)*100.0)
......@@ -499,25 +493,3 @@ func metricOf(unit string) string {
return unit
}
}
// Significance tests.
func notest(old, new *Metrics) (pval float64, err error) {
return -1, nil
}
func ttest(old, new *Metrics) (pval float64, err error) {
t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers)
if err != nil {
return -1, err
}
return t.P, nil
}
func utest(old, new *Metrics) (pval float64, err error) {
u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers)
if err != nil {
return -1, err
}
return u.P, nil
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment