Commit ad98d977 authored by Quentin Smith's avatar Quentin Smith

analysis/app: add trend graphing

This is still not feature complete, so the page is not linked from
anywhere. It supports graphing benchmark results that match any
perf query. One line is shown per matching benchmark name. Demo
available at:

https://quentin.perf.golang.org/trend?q=upload:20170127.25

Big TODO tasks:
- improve display of multiple benchmarks (multiple graphs?
  click-to-select? additional user controls?)
- correlate benchmarks across uploads (currently all uploads are
  assumed to be from identical machines)
- optimize/cache queries (6 months of a single benchmark loads in 12s;
  6 months of all benchmarks times out at >60s)

Change-Id: I7d1f6073c4837ef63205a10dd4e18085fe7d00ac
Reviewed-on: https://go-review.googlesource.com/36612
Run-TryBot: Quentin Smith <quentin@golang.org>
Reviewed-by: default avatarRuss Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent f5907089
......@@ -23,6 +23,7 @@ func (a *App) RegisterOnMux(mux *http.ServeMux) {
mux.HandleFunc("/", a.index)
mux.HandleFunc("/search", a.search)
mux.HandleFunc("/compare", a.compare)
mux.HandleFunc("/trend", a.trend)
}
// search handles /search.
......
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package app
import "math"
// TODO: This all assumes that data is sampled at a regular interval
// and there are no missing values. It could be generalized to accept
// missing values (perhaps represented by NaN), or generalized much
// further by accepting (t, x) pairs and a vector of times at which to
// evaluate the filter (and an arbitrary window size). I would have to
// figure out how that affects the difference array in KZA.
// TODO: These can generate a lot of garbage. Perhaps the caller
// should pass in the target slice? Or these should just overwrite the
// input array and leave it to the caller to copy if necessary?
// MovingAverage performs a moving average (MA) filter of xs with
// window size m. m must be a positive odd integer.
//
// Note that this is filter is often described in terms of the half
// length of the window (m-1)/2.
func MovingAverage(xs []float64, m int) []float64 {
if m <= 0 || m%2 != 1 {
panic("m must be a positive, odd integer")
}
ys := make([]float64, len(xs))
sum, n := 0.0, 0
for l, i, r := -m, -(m-1)/2, 0; i < len(ys); l, i, r = l+1, i+1, r+1 {
if l >= 0 {
sum -= xs[l]
n--
}
if r < len(xs) {
sum += xs[r]
n++
}
if i >= 0 {
ys[i] = sum / float64(n)
}
}
return ys
}
// KolmogorovZurbenko performs a Kolmogorov-Zurbenko (KZ) filter of xs
// with window size m and k iterations. m must be a positive odd
// integer. k must be positive.
func KolmogorovZurbenko(xs []float64, m, k int) []float64 {
// k is typically small, and MA is quite efficient, so just do
// the iterated moving average rather than bothering to
// compute the binomial coefficient kernel.
for i := 0; i < k; i++ {
// TODO: Generate less garbage.
xs = MovingAverage(xs, m)
}
return xs
}
// AdaptiveKolmogorovZurbenko performs an adaptive Kolmogorov-Zurbenko
// (KZA) filter of xs using an initial window size m and k iterations.
// m must be a positive odd integer. k must be positive.
//
// See Zurbenko, et al. 1996: Detecting discontinuities in time series
// of upper air data: Demonstration of an adaptive filter technique.
// Journal of Climate, 9, 3548–3560.
func AdaptiveKolmogorovZurbenko(xs []float64, m, k int) []float64 {
// Perform initial KZ filter.
z := KolmogorovZurbenko(xs, m, k)
// Compute differenced values.
q := (m - 1) / 2
d := make([]float64, len(z)+1)
maxD := 0.0
for i := q; i < len(z)-q; i++ {
d[i] = math.Abs(z[i+q] - z[i-q])
if d[i] > maxD {
maxD = d[i]
}
}
if maxD == 0 {
// xs is constant, so no amount of filtering will do
// anything. Avoid dividing 0/0 below.
return xs
}
// Compute adaptive filter.
ys := make([]float64, len(xs))
for t := range ys {
dPrime := d[t+1] - d[t]
f := 1 - d[t]/maxD
qt := q
if dPrime <= 0 {
// Zurbenko doesn't specify what to do with
// the fractional part of qt and qh, so we
// interpret this as summing all points of xs
// between qt and qh.
qt = int(math.Ceil(float64(q) * f))
}
if t-qt < 0 {
qt = t
}
qh := q
if dPrime >= 0 {
qh = int(math.Floor(float64(q) * f))
}
if t+qh >= len(xs) {
qh = len(xs) - t - 1
}
sum := 0.0
for i := t - qt; i <= t+qh; i++ {
sum += xs[i]
}
// Zurbenko divides by qh+qt, but this undercounts the
// number of terms in the sum by 1.
ys[t] = sum / float64(qh+qt+1)
}
return ys
}
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package app
import (
"math/rand"
"testing"
)
// Aeq returns true if expect and got are equal to 8 significant
// figures (1 part in 100 million).
func Aeq(expect, got float64) bool {
if expect < 0 && got < 0 {
expect, got = -expect, -got
}
return expect*0.99999999 <= got && got*0.99999999 <= expect
}
func TestMovingAverage(t *testing.T) {
// Test MovingAverage against the obvious (but slow)
// implementation.
xs := make([]float64, 100)
for iter := 0; iter < 10; iter++ {
for i := range xs {
xs[i] = rand.Float64()
}
m := 1 + 2*rand.Intn(100)
ys1, ys2 := MovingAverage(xs, m), slowMovingAverage(xs, m)
// TODO: Use stuff from mathtest.
for i, y1 := range ys1 {
if !Aeq(y1, ys2[i]) {
t.Fatalf("want %v, got %v", ys2, ys1)
}
}
}
}
func slowMovingAverage(xs []float64, m int) []float64 {
ys := make([]float64, len(xs))
for i := range ys {
psum, n := 0.0, 0
for j := i - (m-1)/2; j <= i+(m-1)/2; j++ {
if 0 <= j && j < len(xs) {
psum += xs[j]
n++
}
}
ys[i] = psum / float64(n)
}
return ys
}
This diff is collapsed.
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package app
import (
"testing"
"github.com/aclements/go-gg/table"
"golang.org/x/perf/internal/diff"
)
func TestTableToJS(t *testing.T) {
in := table.TableFromStrings(
[]string{"text", "num"},
[][]string{
{"hello", "15.1"},
{"world", "20"},
}, true)
have := tableToJS(in, []column{{Name: "text"}, {Name: "num"}})
want := `{cols: [{"id":"text","type":"string","label":"text"},
{"id":"num","type":"number","label":"num"}],
rows: [{c:[{v: "hello"}, {v: 15.1}]},
{c:[{v: "world"}, {v: 20}]}]}`
if d := diff.Diff(string(have), want); d != "" {
t.Errorf("tableToJS returned wrong JS (- have, + want):\n%s", d)
}
}
<!DOCTYPE html>
<html>
<head>
<title>Performance Result Comparison</title>
<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
<style type="text/css">
#header h1 {
display: inline;
}
#search {
padding: 1em .5em;
width: 100%;
}
input[type="text"] {
font-size: 100%;
}
#results {
border-top: 1px solid black;
}
</style>
</head>
<body>
<div id="header">
<h1>Go Performance Dashboard</h1>
<a href="/">about</a>
</div>
<div id="search">
<form action="/trend">
<input type="text" name="q" value="{{.Q}}" size="120">
<input type="submit" value="Search">
</form>
</div>
<div id="results">
{{if not .Q}}
<h2>Recent Uploads</h2>
<table>
<tr><th>Upload ID</th><th>trend</th></tr>
{{range .TrendUploads}}
<tr><td><a href="/trend?q=upload:{{.UploadID}}">{{.UploadID}}</a></td><td>{{.LabelValues.trend}}</td></tr>
{{end}}
</table>
{{else}}
{{with .Error}}
<p>{{.}}</p>
{{else}}
<div id="chart" style="height: 600px"></div>
<script type="text/javascript">
google.charts.load('current', {'packages':['corechart']});
google.charts.setOnLoadCallback(draw);
function draw() {
var dt = new google.visualization.DataTable({{.PlotData}});
var options = {
title: 'Benchmark Trend',
hAxis: {
title: 'commit index',
},
vAxis: {
title: 'normalized ns/op',
},
explorer: {
actions: ['dragToZoom', 'rightClickToReset'],
maxZoomIn: 0.05,
},
};
var chart = new google.visualization.LineChart(document.getElementById('chart'));
chart.draw(dt, options);
}
</script>
{{end}}
{{end}}
</div>
</body>
</html>
Copyright (c) 2016 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package generic provides type-generic functions.
package generic
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package generic
import "reflect"
type TypeError struct {
Type1, Type2 reflect.Type
Extra string
}
func (e TypeError) Error() string {
msg := e.Type1.String()
if e.Type2 != nil {
msg += " and " + e.Type2.String()
}
msg += " " + e.Extra
return msg
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package generic
import "reflect"
// CanOrder returns whether the values a and b are orderable according
// to the Go language specification.
func CanOrder(a, b interface{}) bool {
ak, bk := reflect.ValueOf(a).Kind(), reflect.ValueOf(b).Kind()
if ak != bk {
return false
}
return CanOrderR(ak)
}
var orderable = map[reflect.Kind]bool{
reflect.Int: true,
reflect.Int8: true,
reflect.Int16: true,
reflect.Int32: true,
reflect.Int64: true,
reflect.Uint: true,
reflect.Uintptr: true,
reflect.Uint8: true,
reflect.Uint16: true,
reflect.Uint32: true,
reflect.Uint64: true,
reflect.Float32: true,
reflect.Float64: true,
reflect.String: true,
}
// CanOrderR returns whether two values of kind k are orderable
// according to the Go language specification.
func CanOrderR(k reflect.Kind) bool {
return orderable[k]
}
// Order returns the order of values a and b: -1 if a < b, 0 if a ==
// b, 1 if a > b. The results are undefined if either a or b is NaN.
//
// Order panics if a and b are not orderable according to the Go
// language specification.
func Order(a, b interface{}) int {
return OrderR(reflect.ValueOf(a), reflect.ValueOf(b))
}
// OrderR is equivalent to Order, but operates on reflect.Values.
func OrderR(a, b reflect.Value) int {
if a.Kind() != b.Kind() {
panic(&TypeError{a.Type(), b.Type(), "are not orderable because they are different kinds"})
}
switch a.Kind() {
case reflect.Float32, reflect.Float64:
a, b := a.Float(), b.Float()
if a < b {
return -1
} else if a > b {
return 1
}
return 0
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
a, b := a.Int(), b.Int()
if a < b {
return -1
} else if a > b {
return 1
}
return 0
case reflect.Uint, reflect.Uintptr, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
a, b := a.Uint(), b.Uint()
if a < b {
return -1
} else if a > b {
return 1
}
return 0
case reflect.String:
a, b := a.String(), b.String()
if a < b {
return -1
} else if a > b {
return 1
}
return 0
}
panic(&TypeError{a.Type(), nil, "is not orderable"})
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slice
import (
"reflect"
"github.com/aclements/go-gg/generic"
)
// Concat returns the concatenation of all of ss. The types of all of
// the arguments must be identical or Concat will panic with a
// *generic.TypeError. The returned slice will have the same type as the
// inputs. If there are 0 arguments, Concat returns nil. Concat does
// not modify any of the input slices.
func Concat(ss ...T) T {
if len(ss) == 0 {
return nil
}
rvs := make([]reflect.Value, len(ss))
total := 0
var typ reflect.Type
for i, s := range ss {
rvs[i] = reflectSlice(s)
total += rvs[i].Len()
if i == 0 {
typ = rvs[i].Type()
} else if rvs[i].Type() != typ {
panic(&generic.TypeError{typ, rvs[i].Type(), "have different types"})
}
}
out := reflect.MakeSlice(typ, 0, total)
for _, rv := range rvs {
out = reflect.AppendSlice(out, rv)
}
return out.Interface()
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slice
import (
"reflect"
"github.com/aclements/go-gg/generic"
)
// Convert converts each element in from and assigns it to *to. to
// must be a pointer to a slice. Convert slices or extends *to to
// len(from) and then assigns to[i] = T(from[i]) where T is the type
// of *to's elements. If from and *to have the same element type, it
// simply assigns *to = from.
func Convert(to interface{}, from T) {
fv := reflectSlice(from)
tv := reflect.ValueOf(to)
if tv.Kind() != reflect.Ptr {
panic(&generic.TypeError{tv.Type(), nil, "is not a *[]T"})
}
tst := tv.Type().Elem()
if tst.Kind() != reflect.Slice {
panic(&generic.TypeError{tv.Type(), nil, "is not a *[]T"})
}
if fv.Type().AssignableTo(tst) {
tv.Elem().Set(fv)
return
}
eltt := tst.Elem()
if !fv.Type().Elem().ConvertibleTo(eltt) {
panic(&generic.TypeError{fv.Type(), tst, "cannot be converted"})
}
switch to := to.(type) {
case *[]float64:
// This is extremely common.
*to = (*to)[:0]
for i, len := 0, fv.Len(); i < len; i++ {
*to = append(*to, fv.Index(i).Convert(eltt).Float())
}
default:
tsv := tv.Elem()
tsv.SetLen(0)
for i, len := 0, fv.Len(); i < len; i++ {
tsv = reflect.Append(tsv, fv.Index(i).Convert(eltt))
}
tv.Elem().Set(tsv)
}
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slice
import "reflect"
// Cycle constructs a slice of length length by repeatedly
// concatenating s to itself. If len(s) >= length, it returns
// s[:length]. Otherwise, it allocates a new slice. If len(s) == 0 and
// length != 0, Cycle panics.
func Cycle(s T, length int) T {
rv := reflectSlice(s)
if rv.Len() >= length {
return rv.Slice(0, length).Interface()
}
if rv.Len() == 0 {
panic("empty slice")
}
// Allocate a new slice of the appropriate length.
out := reflect.MakeSlice(rv.Type(), length, length)
// Copy elements to out.
for pos := 0; pos < length; {
pos += reflect.Copy(out.Slice(pos, length), rv)
}
return out.Interface()
}
// Repeat returns a slice consisting of length copies of v.
func Repeat(v interface{}, length int) T {
if length < 0 {
length = 0
}
rv := reflect.ValueOf(v)
out := reflect.MakeSlice(reflect.SliceOf(rv.Type()), length, length)
for i := 0; i < length; i++ {
out.Index(i).Set(rv)
}
return out.Interface()
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package slice provides generic slice functions.
package slice
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slice
import (
"reflect"
"github.com/aclements/go-gg/generic"
)
// Index returns the index of the first instance of val in s, or -1 if
// val is not present in s. val's type must be s's element type.
func Index(s T, val interface{}) int {
rs := reflectSlice(s)
if vt := reflect.TypeOf(val); rs.Type().Elem() != vt {
// TODO: Better "<seq> is not a sequence of <val>".
panic(&generic.TypeError{rs.Type(), vt, "cannot find"})
}
for i, l := 0, rs.Len(); i < l; i++ {
if rs.Index(i).Interface() == val {
return i
}
}
return -1
}
// LastIndex returns the index of the last instance of val in s, or -1
// if val is not present in s. val's type must be s's element type.
func LastIndex(s T, val interface{}) int {
rs := reflectSlice(s)
if vt := reflect.TypeOf(val); rs.Type().Elem() != vt {
// TODO: Better "<seq> is not a sequence of <val>".
panic(&generic.TypeError{rs.Type(), vt, "cannot find"})
}
for i := rs.Len() - 1; i >= 0; i-- {
if rs.Index(i).Interface() == val {
return i
}
}
return -1
}
// Contains reports whether val is within s. val's type must be s's
// element type.
func Contains(s T, val interface{}) bool {
return Index(s, val) >= 0
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slice
import (
"reflect"
"github.com/aclements/go-gg/generic"
)
// Select returns a slice w such that w[i] = v[indexes[i]].
func Select(v T, indexes []int) T {
switch v := v.(type) {
case []int:
res := make([]int, len(indexes))
for i, x := range indexes {
res[i] = v[x]
}
return res
case []float64:
res := make([]float64, len(indexes))
for i, x := range indexes {
res[i] = v[x]
}
return res
case []string:
res := make([]string, len(indexes))
for i, x := range indexes {
res[i] = v[x]
}
return res
}
rv := reflectSlice(v)
res := reflect.MakeSlice(rv.Type(), len(indexes), len(indexes))
for i, x := range indexes {
res.Index(i).Set(rv.Index(x))
}
return res.Interface()
}
// SelectInto assigns out[i] = in[indexes[i]]. in and out must have
// the same types and len(out) must be >= len(indexes). If in and out
// overlap, the results are undefined.
func SelectInto(out, in T, indexes []int) {
// TODO: Maybe they should only have to be assignable?
if it, ot := reflect.TypeOf(in), reflect.TypeOf(out); it != ot {
panic(&generic.TypeError{it, ot, "must be the same type"})
}
switch in := in.(type) {
case []int:
out := out.([]int)
for i, x := range indexes {
out[i] = in[x]
}
return
case []float64:
out := out.([]float64)
for i, x := range indexes {
out[i] = in[x]
}
return
case []string:
out := out.([]string)
for i, x := range indexes {
out[i] = in[x]
}
return
}
inv, outv := reflectSlice(in), reflectSlice(out)
for i, x := range indexes {
outv.Index(i).Set(inv.Index(x))
}
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slice
import (
"reflect"
"sort"
"github.com/aclements/go-gg/generic"
)
// Min returns the minimum value in v. v must either implement
// sort.Interface or its elements must be orderable. Min panics if v
// is empty.
func Min(v T) interface{} {
x, _ := minmax(v, -1, true)
return x.Interface()
}
// ArgMin returns the index of the minimum value in v. If there are
// multiple indexes equal to the minimum value, ArgMin returns the
// lowest of them. v must be a slice whose elements are orderable, or
// must implement sort.Interface. ArgMin panics if v is empty.
func ArgMin(v interface{}) int {
_, i := minmax(v, -1, false)
return i
}
// Max returns the maximum value in v. v must either implement
// sort.Interface or its elements must be orderable. Max panics if v
// is empty.
func Max(v T) interface{} {
x, _ := minmax(v, 1, true)
return x.Interface()
}
// ArgMax returns the index of the maximum value in v. If there are
// multiple indexes equal to the maximum value, ArgMax returns the
// lowest of them. v must be a slice whose elements are orderable, or
// must implement sort.Interface. ArgMax panics if v is empty.
func ArgMax(v interface{}) int {
_, i := minmax(v, 1, false)
return i
}
func minmax(v interface{}, keep int, val bool) (reflect.Value, int) {
switch v := v.(type) {
case sort.Interface:
if v.Len() == 0 {
if keep < 0 {
panic("zero-length sequence has no minimum")
} else {
panic("zero-length sequence has no maximum")
}
}
maxi := 0
if keep < 0 {
for i, len := 0, v.Len(); i < len; i++ {
if v.Less(i, maxi) {
maxi = i
}
}
} else {
for i, len := 0, v.Len(); i < len; i++ {
if v.Less(maxi, i) {
maxi = i
}
}
}
if !val {
return reflect.Value{}, maxi
}
rv := reflectSlice(v)
return rv.Index(maxi), maxi
}
rv := reflectSlice(v)
if !generic.CanOrderR(rv.Type().Elem().Kind()) {
panic(&generic.TypeError{rv.Type().Elem(), nil, "is not orderable"})
}
if rv.Len() == 0 {
if keep < 0 {
panic("zero-length slice has no minimum")
} else {
panic("zero-length slice has no maximum")
}
}
max, maxi := rv.Index(0), 0
for i, len := 1, rv.Len(); i < len; i++ {
if elt := rv.Index(i); generic.OrderR(elt, max) == keep {
max, maxi = elt, i
}
}
return max, maxi
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slice
import "reflect"
var trueVal = reflect.ValueOf(true)
// Nub returns v with duplicates removed. It keeps the first instance
// of each distinct value and preserves their order.
func Nub(v T) T {
rv := reflectSlice(v)
indexes := make([]int, 0)
set := reflect.MakeMap(reflect.MapOf(rv.Type().Elem(), trueVal.Type()))
for i, l := 0, rv.Len(); i < l; i++ {
x := rv.Index(i)
if set.MapIndex(x).IsValid() {
continue
}
set.SetMapIndex(x, trueVal)
indexes = append(indexes, i)
}
return Select(v, indexes)
}
// NubAppend is equivalent to appending all of the slices in vs and
// then calling Nub on the result, but more efficient.
func NubAppend(vs ...T) T {
if len(vs) == 0 {
return nil
}
rv := reflectSlice(vs[0])
set := reflect.MakeMap(reflect.MapOf(rv.Type().Elem(), trueVal.Type()))
out := reflect.MakeSlice(rv.Type(), 0, 0)
for _, v := range vs {
rv := reflectSlice(v)
for i, l := 0, rv.Len(); i < l; i++ {
x := rv.Index(i)
if set.MapIndex(x).IsValid() {
continue
}
set.SetMapIndex(x, trueVal)
out = reflect.Append(out, x)
}
}
return out.Interface()
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slice
import (
"reflect"
"github.com/aclements/go-gg/generic"
)
// T is a Go slice value of type []U.
//
// This is primarily for documentation. There is no way to statically
// enforce this in Go; however, functions that expect a slice will
// panic with a *generic.TypeError if passed a non-slice value.
type T interface{}
// reflectSlice checks that s is a slice and returns its
// reflect.Value. It panics with a *generic.TypeError if s is not a slice.
func reflectSlice(s T) reflect.Value {
rv := reflect.ValueOf(s)
if rv.Kind() != reflect.Slice {
panic(&generic.TypeError{rv.Type(), nil, "is not a slice"})
}
return rv
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slice
import (
"reflect"
"sort"
"time"
"github.com/aclements/go-gg/generic"
)
// CanSort returns whether the value v can be sorted.
func CanSort(v interface{}) bool {
switch v.(type) {
case sort.Interface, []time.Time:
return true
}
return generic.CanOrderR(reflect.TypeOf(v).Elem().Kind())
}
// Sort sorts v in increasing order. v must implement sort.Interface,
// be a slice whose elements are orderable, or be a []time.Time.
func Sort(v interface{}) {
sort.Sort(Sorter(v))
}
// Sorter returns a sort.Interface for sorting v. v must implement
// sort.Interface, be a slice whose elements are orderable, or be a
// []time.Time.
func Sorter(v interface{}) sort.Interface {
switch v := v.(type) {
case []int:
return sort.IntSlice(v)
case []float64:
return sort.Float64Slice(v)
case []string:
return sort.StringSlice(v)
case []time.Time:
return sortTimeSlice(v)
case sort.Interface:
return v
}
rv := reflectSlice(v)
switch rv.Type().Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return sortIntSlice{rv}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return sortUintSlice{rv}
case reflect.Float32, reflect.Float64:
return sortFloatSlice{rv}
case reflect.String:
return sortStringSlice{rv}
}
panic(&generic.TypeError{rv.Type().Elem(), nil, "is not orderable"})
}
type sortIntSlice struct {
reflect.Value
}
func (s sortIntSlice) Len() int {
return s.Value.Len()
}
func (s sortIntSlice) Less(i, j int) bool {
return s.Index(i).Int() < s.Index(j).Int()
}
func (s sortIntSlice) Swap(i, j int) {
a, b := s.Index(i).Int(), s.Index(j).Int()
s.Index(i).SetInt(b)
s.Index(j).SetInt(a)
}
type sortUintSlice struct {
reflect.Value
}
func (s sortUintSlice) Len() int {
return s.Value.Len()
}
func (s sortUintSlice) Less(i, j int) bool {
return s.Index(i).Uint() < s.Index(j).Uint()
}
func (s sortUintSlice) Swap(i, j int) {
a, b := s.Index(i).Uint(), s.Index(j).Uint()
s.Index(i).SetUint(b)
s.Index(j).SetUint(a)
}
type sortFloatSlice struct {
reflect.Value
}
func (s sortFloatSlice) Len() int {
return s.Value.Len()
}
func (s sortFloatSlice) Less(i, j int) bool {
return s.Index(i).Float() < s.Index(j).Float()
}
func (s sortFloatSlice) Swap(i, j int) {
a, b := s.Index(i).Float(), s.Index(j).Float()
s.Index(i).SetFloat(b)
s.Index(j).SetFloat(a)
}
type sortStringSlice struct {
reflect.Value
}
func (s sortStringSlice) Len() int {
return s.Value.Len()
}
func (s sortStringSlice) Less(i, j int) bool {
return s.Index(i).String() < s.Index(j).String()
}
func (s sortStringSlice) Swap(i, j int) {
a, b := s.Index(i).String(), s.Index(j).String()
s.Index(i).SetString(b)
s.Index(j).SetString(a)
}
type sortTimeSlice []time.Time
func (s sortTimeSlice) Len() int { return len(s) }
func (s sortTimeSlice) Less(i, j int) bool { return s[i].Before(s[j]) }
func (s sortTimeSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ggstat
import (
"fmt"
"reflect"
"github.com/aclements/go-gg/generic/slice"
"github.com/aclements/go-gg/table"
"github.com/aclements/go-moremath/stats"
"github.com/aclements/go-moremath/vec"
)
// TODO: AggFirst, AggTukey. StdDev?
// Agg constructs an Aggregate transform from a grouping column and a
// set of Aggregators.
//
// TODO: Does this belong in ggstat? The specific aggregator functions
// probably do, but the concept could go in package table.
func Agg(xs ...string) func(aggs ...Aggregator) Aggregate {
return func(aggs ...Aggregator) Aggregate {
return Aggregate{xs, aggs}
}
}
// Aggregate computes aggregate functions of a table grouped by
// distinct values of a column or set of columns.
//
// Aggregate first groups the table by the Xs columns. Each of these
// groups produces a single row in the output table, where the unique
// value of each of the Xs columns appears in the output row, along
// with constant columns from the input, as well as any columns that
// have a unique value within every group (they're "effectively"
// constant). Additional columns in the output row are produced by
// applying the Aggregator functions to the group.
type Aggregate struct {
// Xs is the list column names to group values by before
// computing aggregate functions.
Xs []string
// Aggregators is the set of Aggregator functions to apply to
// each group of values.
Aggregators []Aggregator
}
// An Aggregator is a function that aggregates each group of input
// into one row and adds it to output. It may be based on multiple
// columns from input and may add multiple columns to output.
type Aggregator func(input table.Grouping, output *table.Builder)
func (s Aggregate) F(g table.Grouping) table.Grouping {
isConst := make([]bool, len(g.Columns()))
for i := range isConst {
isConst[i] = true
}
subgroups := map[table.GroupID]table.Grouping{}
for _, gid := range g.Tables() {
g := table.GroupBy(g.Table(gid), s.Xs...)
subgroups[gid] = g
for i, col := range g.Columns() {
if !isConst[i] {
continue
}
// Can this column be promoted to constant?
for _, gid2 := range g.Tables() {
t := g.Table(gid2)
isConst[i] = isConst[i] && checkConst(t, col)
}
}
}
return table.MapTables(g, func(_ table.GroupID, t *table.Table) *table.Table {
g := table.GroupBy(t, s.Xs...)
var nt table.Builder
// Construct X columns.
rows := len(g.Tables())
for colidx, xcol := range s.Xs {
xs := reflect.MakeSlice(table.ColType(t, xcol), rows, rows)
for i, gid := range g.Tables() {
for j := 0; j < len(s.Xs)-colidx-1; j++ {
gid = gid.Parent()
}
xs.Index(i).Set(reflect.ValueOf(gid.Label()))
}
nt.Add(xcol, xs.Interface())
}
// Apply Aggregators.
for _, agg := range s.Aggregators {
agg(g, &nt)
}
// Keep constant and effectively constant columns.
for i := range isConst {
col := t.Columns()[i]
if !isConst[i] || nt.Has(col) {
continue
}
if cv, ok := t.Const(col); ok {
nt.AddConst(col, cv)
continue
}
ncol := reflect.MakeSlice(table.ColType(t, col), len(g.Tables()), len(g.Tables()))
for i, gid := range g.Tables() {
v := reflect.ValueOf(g.Table(gid).Column(col))
ncol.Index(i).Set(v.Index(0))
}
nt.Add(col, ncol.Interface())
}
return nt.Done()
})
}
func checkConst(t *table.Table, col string) bool {
if _, ok := t.Const(col); ok {
return true
}
v := reflect.ValueOf(t.Column(col))
if v.Len() <= 1 {
return true
}
if !v.Type().Elem().Comparable() {
return false
}
elem := v.Index(0).Interface()
for i, l := 1, v.Len(); i < l; i++ {
if elem != v.Index(i).Interface() {
return false
}
}
return true
}
// AggCount returns an aggregate function that computes the number of
// rows in each group. The resulting column will be named label, or
// "count" if label is "".
func AggCount(label string) Aggregator {
if label == "" {
label = "count"
}
return func(input table.Grouping, b *table.Builder) {
counts := make([]int, 0, len(input.Tables()))
for _, gid := range input.Tables() {
counts = append(counts, input.Table(gid).Len())
}
b.Add(label, counts)
}
}
// AggMean returns an aggregate function that computes the mean of
// each of cols. The resulting columns will be named "mean <col>" and
// will have the same type as <col>.
func AggMean(cols ...string) Aggregator {
return aggFn(stats.Mean, "mean ", cols...)
}
// AggGeoMean returns an aggregate function that computes the
// geometric mean of each of cols. The resulting columns will be named
// "geomean <col>" and will have the same type as <col>.
func AggGeoMean(cols ...string) Aggregator {
return aggFn(stats.GeoMean, "geomean ", cols...)
}
// AggMin returns an aggregate function that computes the minimum of
// each of cols. The resulting columns will be named "min <col>" and
// will have the same type as <col>.
func AggMin(cols ...string) Aggregator {
min := func(xs []float64) float64 {
x, _ := stats.Bounds(xs)
return x
}
return aggFn(min, "min ", cols...)
}
// AggMax returns an aggregate function that computes the maximum of
// each of cols. The resulting columns will be named "max <col>" and
// will have the same type as <col>.
func AggMax(cols ...string) Aggregator {
max := func(xs []float64) float64 {
_, x := stats.Bounds(xs)
return x
}
return aggFn(max, "max ", cols...)
}
// AggSum returns an aggregate function that computes the sum of each
// of cols. The resulting columns will be named "sum <col>" and will
// have the same type as <col>.
func AggSum(cols ...string) Aggregator {
return aggFn(vec.Sum, "sum ", cols...)
}
// AggQuantile returns an aggregate function that computes a quantile
// of each of cols. quantile has a range of [0,1]. The resulting
// columns will be named "<prefix> <col>" and will have the same type
// as <col>.
func AggQuantile(prefix string, quantile float64, cols ...string) Aggregator {
// "prefix" could be autogenerated (e.g. fmt.Sprintf("p%g ",
// quantile * 100)), but then the caller would need to do the
// same fmt.Sprintf to compute the column name they had just
// created. Perhaps Aggregator should provide a way to find
// the generated column names.
return aggFn(func(data []float64) float64 {
return stats.Sample{Xs: data}.Quantile(quantile)
}, prefix+" ", cols...)
}
func aggFn(f func([]float64) float64, prefix string, cols ...string) Aggregator {
ocols := make([]string, len(cols))
for i, col := range cols {
ocols[i] = prefix + col
}
return func(input table.Grouping, b *table.Builder) {
for coli, col := range cols {
means := make([]float64, 0, len(input.Tables()))
var xs []float64
var ct reflect.Type
for i, gid := range input.Tables() {
v := input.Table(gid).MustColumn(col)
if i == 0 {
ct = reflect.TypeOf(v)
}
slice.Convert(&xs, v)
means = append(means, f(xs))
}
if ct == float64SliceType {
b.Add(ocols[coli], means)
} else {
// Convert means back to the type of col.
outptr := reflect.New(ct)
slice.Convert(outptr.Interface(), means)
b.Add(ocols[coli], outptr.Elem().Interface())
}
}
}
}
// AggUnique returns an aggregate function retains the unique value of
// each of cols within each aggregate group, or panics if some group
// contains more than one value for one of these columns.
//
// Note that Aggregate will automatically retain columns that happen
// to be unique. AggUnique can be used to enforce at aggregation time
// that certain columns *must* be unique (and get a nice error if they
// are not).
func AggUnique(cols ...string) Aggregator {
return func(input table.Grouping, b *table.Builder) {
if len(cols) == 0 {
return
}
if len(input.Tables()) == 0 {
panic(fmt.Sprintf("unknown column: %q", cols[0]))
}
for _, col := range cols {
ctype := table.ColType(input, col)
rows := len(input.Tables())
vs := reflect.MakeSlice(ctype, rows, rows)
for i, gid := range input.Tables() {
// Get values in this column.
xs := reflect.ValueOf(input.Table(gid).MustColumn(col))
// Check for uniqueness.
if xs.Len() == 0 {
panic(fmt.Sprintf("cannot AggUnique empty column %q", col))
}
uniquev := xs.Index(0)
unique := uniquev.Interface()
for i, len := 1, xs.Len(); i < len; i++ {
other := xs.Index(i).Interface()
if unique != other {
panic(fmt.Sprintf("column %q is not unique; contains at least %v and %v", col, unique, other))
}
}
// Store unique value.
vs.Index(i).Set(uniquev)
}
// Add unique values slice to output table.
b.Add(col, vs.Interface())
}
}
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ggstat
import (
"math"
"reflect"
"sort"
"github.com/aclements/go-gg/generic"
"github.com/aclements/go-gg/generic/slice"
"github.com/aclements/go-gg/table"
"github.com/aclements/go-moremath/vec"
)
// XXX If this is just based on the number of bins, it can come up
// with really ugly boundary numbers. If the bin width is specified,
// then you could also specify the left edge and bins will be placed
// at [align+width*N, align+width*(N+1)]. ggplot2 also lets you
// specify the center alignment.
//
// XXX In Matlab and NumPy, bins are open on the right *except* for
// the last bin, which is closed on both.
//
// XXX Number of bins/bin width/specify boundaries, same bins across
// all groups/separate for each group/based on shared scales (don't
// have that information here), relative or absolute histogram (Matlab
// has lots more).
//
// XXX Scale transform.
//
// The result of Bin has two columns in addition to constant columns from the input:
//
// - Column X is the left edge of the bin.
//
// - Column W is the sum of the rows' weights, or column "count" is
// the number of rows in the bin.
type Bin struct {
// X is the name of the column to use for samples.
X string
// W is the optional name of the column to use for sample
// weights. It may be "" to weight each sample as 1.
W string
// Width controls how wide each bin should be. If not provided
// or 0, a width will be chosen to produce 30 bins. If X is an
// integer column, this width will be treated as an integer as
// well.
Width float64
// Center controls the center point of each bin. To center on
// integers, for example, you could use {Width: 1, Center:
// 0}.
// XXX What does center mean for integers? Should an unspecified center yield an autochosen one, or 0?
//Center float64
// Breaks is the set of break points to use as boundaries
// between bins. The interval of each bin is [Breaks[i],
// Breaks[i+1]). Data points before the first break are
// dropped. If provided, Width and Center are ignored.
Breaks table.Slice
// SplitGroups indicates that each group in the table should
// have separate bounds based on the data in that group alone.
// The default, false, indicates that the binning function
// should use the bounds of all of the data combined. This
// makes it easier to compare bins across groups.
SplitGroups bool
}
func (b Bin) F(g table.Grouping) table.Grouping {
breaks := reflect.ValueOf(b.Breaks)
agg := AggCount("count")
if b.W != "" {
agg = aggFn(vec.Sum, "", b.W)
}
if !breaks.IsValid() && !b.SplitGroups {
breaks = b.computeBreaks(g)
}
// Change b.X to the start of the bin.
g = table.MapTables(g, func(_ table.GroupID, t *table.Table) *table.Table {
breaks := breaks
if !breaks.IsValid() {
breaks = b.computeBreaks(t)
}
nbreaks := breaks.Len()
in := reflect.ValueOf(t.MustColumn(b.X))
nin := in.Len()
out := reflect.MakeSlice(breaks.Type(), nin, nin)
var found []int
for i := 0; i < nin; i++ {
elt := in.Index(i)
bin := sort.Search(nbreaks, func(j int) bool {
return generic.OrderR(elt, breaks.Index(j)) < 0
})
// 0 means the row doesn't fit on the front
// XXX Allow configuring the first and last bin as infinite or not.
bin = bin - 1
if bin >= 0 {
found = append(found, i)
out.Index(i).Set(breaks.Index(bin))
}
}
var nt table.Builder
for _, col := range t.Columns() {
if col == b.X {
nt.Add(col, slice.Select(out.Interface(), found))
} else if c, ok := t.Const(col); ok {
nt.AddConst(col, c)
} else {
nt.Add(col, slice.Select(t.Column(col), found))
}
}
return nt.Done()
})
// Group by the found bin
return Agg(b.X)(agg).F(g)
}
func (b Bin) computeBreaks(g table.Grouping) reflect.Value {
var cols []slice.T
for _, gid := range g.Tables() {
cols = append(cols, g.Table(gid).MustColumn(b.X))
}
data := slice.Concat(cols...)
min := slice.Min(data)
max := slice.Max(data)
rv := reflect.ValueOf(min)
switch rv.Type().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
min, max := rv.Int(), reflect.ValueOf(max).Int()
width := int64(b.Width)
if width == 0 {
width = (max - min) / 30
if width < 1 {
width = 1
}
}
// XXX: This assumes boundaries should be aligned with
// 0. We should support explicit Center or Boundary
// requests.
min -= (min % width)
var breaks []int64
for i := min; i < max; i += width {
breaks = append(breaks, i)
}
outs := reflect.New(reflect.ValueOf(cols[0]).Type())
slice.Convert(outs.Interface(), breaks)
return outs.Elem()
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
min, max := rv.Uint(), reflect.ValueOf(max).Uint()
width := uint64(b.Width)
if width == 0 {
width = (max - min) / 30
if width < 1 {
width = 1
}
}
min -= (min % width)
var breaks []uint64
for i := min; i < max; i += width {
breaks = append(breaks, i)
}
outs := reflect.New(reflect.ValueOf(cols[0]).Type())
slice.Convert(outs.Interface(), breaks)
return outs.Elem()
case reflect.Float32, reflect.Float64:
min, max := rv.Float(), reflect.ValueOf(max).Float()
width := b.Width
if width == 0 {
width = (max - min) / 30
if width == 0 {
width = 1
}
}
min -= math.Mod(min, width)
var breaks []float64
for i := min; i < max; i += width {
breaks = append(breaks, i)
}
outs := reflect.New(reflect.ValueOf(cols[0]).Type())
slice.Convert(outs.Interface(), breaks)
return outs.Elem()
default:
panic("can't compute breaks for unknown type")
}
}
// TODO: Count for categorical data.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ggstat
import "reflect"
var float64Type = reflect.TypeOf(float64(0))
var float64SliceType = reflect.TypeOf([]float64(nil))
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ggstat
import (
"github.com/aclements/go-gg/generic/slice"
"github.com/aclements/go-gg/table"
"github.com/aclements/go-moremath/stats"
"github.com/aclements/go-moremath/vec"
)
// TODO: Default to first (and second) column for X (and Y)?
// Density constructs a probability density estimate from a set of
// samples using kernel density estimation.
//
// X is the only required field. All other fields have reasonable
// default zero values.
//
// The result of Density has three columns in addition to constant
// columns from the input:
//
// - Column X is the points at which the density estimate is sampled.
//
// - Column "probability density" is the density estimate.
//
// - Column "cumulative density" is the cumulative density estimate.
type Density struct {
// X is the name of the column to use for samples.
X string
// W is the optional name of the column to use for sample
// weights. It may be "" to uniformly weight samples.
W string
// N is the number of points to sample the KDE at. If N is 0,
// a reasonable default is used.
//
// TODO: This is particularly sensitive to the scale
// transform.
//
// TODO: Base the default on the bandwidth. If the bandwidth
// is really narrow, we may need a lot of samples to exceed
// the Nyquist rate.
N int
// Domain specifies the domain at which to sample this function.
// If Domain is nil, it defaults to DomainData{}.
Domain FunctionDomainer
// Kernel is the kernel to use for the KDE.
Kernel stats.KDEKernel
// Bandwidth is the bandwidth to use for the KDE.
//
// If this is zero, the bandwidth is computed from the data
// using a default bandwidth estimator (currently
// stats.BandwidthScott).
Bandwidth float64
// BoundaryMethod is the boundary correction method to use for
// the KDE. The default value is BoundaryReflect; however, the
// default bounds are effectively +/-inf, which is equivalent
// to performing no boundary correction.
BoundaryMethod stats.KDEBoundaryMethod
// [BoundaryMin, BoundaryMax) specify a bounded support for
// the KDE. If both are 0 (their default values), they are
// treated as +/-inf.
//
// To specify a half-bounded support, set Min to math.Inf(-1)
// or Max to math.Inf(1).
BoundaryMin float64
BoundaryMax float64
}
func (d Density) F(g table.Grouping) table.Grouping {
kde := stats.KDE{
Kernel: d.Kernel,
Bandwidth: d.Bandwidth,
BoundaryMethod: d.BoundaryMethod,
BoundaryMin: d.BoundaryMin,
BoundaryMax: d.BoundaryMax,
}
dname, cname := "probability density", "cumulative density"
addEmpty := func(out *table.Builder) {
out.Add(dname, []float64{})
out.Add(cname, []float64{})
}
return Function{
X: d.X, N: d.N, Domain: d.Domain,
Fn: func(gid table.GroupID, in *table.Table, sampleAt []float64, out *table.Builder) {
if len(sampleAt) == 0 {
addEmpty(out)
return
}
// Get input sample.
var sample stats.Sample
slice.Convert(&sample.Xs, in.MustColumn(d.X))
if d.W != "" {
slice.Convert(&sample.Weights, in.MustColumn(d.W))
if sample.Weight() == 0 {
addEmpty(out)
return
}
}
// Compute KDE.
kde.Sample = sample
if d.Bandwidth == 0 {
kde.Bandwidth = stats.BandwidthScott(sample)
}
out.Add(dname, vec.Map(kde.PDF, sampleAt))
out.Add(cname, vec.Map(kde.CDF, sampleAt))
},
}.F(g)
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ggstat
import (
"math"
"github.com/aclements/go-gg/generic/slice"
"github.com/aclements/go-gg/table"
"github.com/aclements/go-moremath/stats"
)
// A FunctionDomainer computes the domain over which to evaluate a
// statistical function.
type FunctionDomainer interface {
// FunctionDomain computes the domain of a particular column
// within a table. It takes a Grouping and a column in that
// Grouping to compute the domain of and returns a function
// that computes the domain for a specific group in the
// Grouping. This makes it possible for FunctionDomain to
// easily compute either Grouping-wide domains, or per-Table
// domains.
//
// The returned domain may be (NaN, NaN) to indicate that
// there is no data and the domain is vacuous.
FunctionDomain(g table.Grouping, col string) func(gid table.GroupID) (min, max float64)
}
// DomainFixed is a FunctionDomainer that returns a fixed domain.
type DomainFixed struct {
Min, Max float64
}
var _ FunctionDomainer = DomainFixed{}
func (r DomainFixed) FunctionDomain(g table.Grouping, col string) func(gid table.GroupID) (min, max float64) {
return func(table.GroupID) (min, max float64) {
return r.Min, r.Max
}
}
// DomainData is a FunctionDomainer that computes domains based on the
// bounds of the data.
type DomainData struct {
// Widen expands the domain by Widen times the span of the
// data.
//
// A value of 1.0 means to use exactly the bounds of the data.
// If Widen is 0, it is treated as 1.1 (that is, widen the
// domain by 10%, or 5% on the left and 5% on the right).
Widen float64
// SplitGroups indicates that each group in the table should
// have a separate domain based on the data in that group
// alone. The default, false, indicates that the domain should
// be based on all of the data in the table combined. This
// makes it possible to stack functions and easier to compare
// them across groups.
SplitGroups bool
}
var _ FunctionDomainer = DomainData{}
const defaultWiden = 1.1
func (r DomainData) FunctionDomain(g table.Grouping, col string) func(gid table.GroupID) (min, max float64) {
widen := r.Widen
if widen <= 0 {
widen = defaultWiden
}
var xs []float64
if !r.SplitGroups {
// Compute combined bounds.
gmin, gmax := math.NaN(), math.NaN()
for _, gid := range g.Tables() {
t := g.Table(gid)
slice.Convert(&xs, t.MustColumn(col))
xmin, xmax := stats.Bounds(xs)
if xmin < gmin || math.IsNaN(gmin) {
gmin = xmin
}
if xmax > gmax || math.IsNaN(gmax) {
gmax = xmax
}
}
// Widen bounds.
span := gmax - gmin
gmin, gmax = gmin-span*(widen-1)/2, gmax+span*(widen-1)/2
return func(table.GroupID) (min, max float64) {
return gmin, gmax
}
}
return func(gid table.GroupID) (min, max float64) {
// Compute bounds.
slice.Convert(&xs, g.Table(gid).MustColumn(col))
min, max = stats.Bounds(xs)
// Widen bounds.
span := max - min
min, max = min-span*(widen-1)/2, max+span*(widen-1)/2
return
}
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ggstat
import (
"github.com/aclements/go-gg/generic/slice"
"github.com/aclements/go-gg/table"
"github.com/aclements/go-moremath/vec"
)
// ECDF constructs an empirical CDF from a set of samples.
//
// X is the only required field. All other fields have reasonable
// default zero values.
//
// The result of ECDF has three columns in addition to constant
// columns from the input. The names of the columns depend on whether
// Label is "".
//
// - Column X is the points at which the CDF changes (a subset of the
// samples).
//
// - Column "cumulative density" or "cumulative density of <label>" is
// the cumulative density estimate.
//
// - Column "cumulative count" (if W and Label are ""), "cumulative
// weight" (if W is not "", but Label is "") or "cumulative <label>"
// (if Label is not "") is the cumulative count or weight of samples.
// That is, cumulative density times the total weight of the samples.
type ECDF struct {
// X is the name of the column to use for samples.
X string
// W is the optional name of the column to use for sample
// weights. It may be "" to uniformly weight samples.
W string
// Label, if not "", gives a label for the samples. It is used
// to construct more specific names for the output columns. It
// should be a plural noun.
Label string
// Domain specifies the domain of the returned ECDF. If the
// domain is wider than the bounds of the data in a group,
// ECDF will add a point below the smallest sample and above
// the largest sample to make the 0 and 1 levels clear. If
// Domain is nil, it defaults to DomainData{}.
Domain FunctionDomainer
}
func (s ECDF) F(g table.Grouping) table.Grouping {
// Set defaults.
if s.Domain == nil {
s.Domain = DomainData{}
}
// Construct output column names.
dname, cname := "cumulative density", "cumulative count"
if s.Label != "" {
dname += " of " + s.Label
cname = "cumulative " + s.Label
} else if s.W != "" {
cname = "cumulative weight"
}
g = table.SortBy(g, s.X)
domain := s.Domain.FunctionDomain(g, s.X)
return table.MapTables(g, func(gid table.GroupID, t *table.Table) *table.Table {
// Get input columns.
var xs, ws []float64
slice.Convert(&xs, t.MustColumn(s.X))
if s.W != "" {
slice.Convert(&ws, t.MustColumn(s.W))
}
// Ignore empty tables.
if len(xs) == 0 {
nt := new(table.Builder).Add(s.X, []float64{}).Add(cname, []float64{}).Add(dname, []float64{})
preserveConsts(nt, t)
return nt.Done()
}
// Get domain.
min, max := domain(gid)
// Create output columns.
xo, do, co := make([]float64, 0), make([]float64, 0), make([]float64, 0)
if min < xs[0] {
// Extend to the left.
xo = append(xo, min)
do = append(do, 0)
co = append(co, 0)
}
// Compute total weight.
var total float64
if ws == nil {
total = float64(t.Len())
} else {
total = vec.Sum(ws)
}
// Create ECDF.
cum := 0.0
for i := 0; i < len(xs); {
j := i
for j < len(xs) && xs[i] == xs[j] {
if ws == nil {
cum += 1
} else {
cum += ws[j]
}
j++
}
xo = append(xo, xs[i])
do = append(do, cum/total)
co = append(co, cum)
i = j
}
if xs[len(xs)-1] < max {
// Extend to the right.
xo = append(xo, max)
do = append(do, 1)
co = append(co, cum)
}
// Construct results table.
nt := new(table.Builder).Add(s.X, xo).Add(dname, do).Add(cname, co)
preserveConsts(nt, t)
return nt.Done()
})
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ggstat
import (
"math"
"reflect"
"github.com/aclements/go-gg/generic/slice"
"github.com/aclements/go-gg/table"
"github.com/aclements/go-moremath/vec"
)
// Function samples a continuous univariate function at N points in
// the domain computed by Domain.
//
// The result of Function binds column X to the X values at which the
// function is sampled and retains constant columns from the input.
// The computed function can add arbitrary columns for its output.
type Function struct {
// X is the name of the column to use for input domain of this
// function.
X string
// N is the number of points to sample the function at. If N
// is 0, a reasonable default is used.
N int
// Domain specifies the domain of which to sample this function.
// If Domain is nil, it defaults to DomainData{}.
Domain FunctionDomainer
// Fn is the continuous univariate function to sample. Fn will
// be called with each table in the grouping and the X values
// at which it should be sampled. Fn must add its output
// columns to out. The output table will already contain the
// sample points bound to the X column.
Fn func(gid table.GroupID, in *table.Table, sampleAt []float64, out *table.Builder)
}
const defaultFunctionSamples = 200
func (f Function) F(g table.Grouping) table.Grouping {
// Set defaults.
if f.N <= 0 {
f.N = defaultFunctionSamples
}
if f.Domain == nil {
f.Domain = DomainData{}
}
domain := f.Domain.FunctionDomain(g, f.X)
return table.MapTables(g, func(gid table.GroupID, t *table.Table) *table.Table {
min, max := domain(gid)
// Compute sample points. If there's no data, there
// are no sample points, but we still have to run the
// function to get the right output columns.
var ss []float64
if math.IsNaN(min) {
ss = []float64{}
} else {
ss = vec.Linspace(min, max, f.N)
}
var nt table.Builder
ctype := table.ColType(t, f.X)
if ctype == float64Type {
// Bind output X column.
nt.Add(f.X, ss)
} else {
// Convert to the column type.
vsp := reflect.New(ctype)
slice.Convert(vsp.Interface(), ss)
vs := vsp.Elem()
// This may have produced duplicate values.
// Eliminate those.
if vs.Len() > 0 {
prev, i := vs.Index(0).Interface(), 1
for j := 1; j < vs.Len(); j++ {
next := vs.Index(j).Interface()
if prev == next {
// Skip duplicate.
continue
}
if i != j {
vs.Index(i).Set(vs.Index(j))
}
i++
prev = next
}
vs.SetLen(i)
}
// Bind column-typed values to output X.
nt.Add(f.X, vs.Interface())
// And convert back to []float64 so we can
// apply the function.
slice.Convert(&ss, vs.Interface())
}
// Apply the function to the sample points.
f.Fn(gid, t, ss, &nt)
preserveConsts(&nt, t)
return nt.Done()
})
}
// preserveConsts copies the constant columns from t into nt.
func preserveConsts(nt *table.Builder, t *table.Table) {
for _, col := range t.Columns() {
if nt.Has(col) {
// Don't overwrite existing columns in nt.
continue
}
if cv, ok := t.Const(col); ok {
nt.AddConst(col, cv)
}
}
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ggstat
import (
"reflect"
"github.com/aclements/go-gg/generic/slice"
"github.com/aclements/go-gg/table"
)
// Normalize normalizes each group such that some data point is 1.
//
// Either X or Index is required (though 0 is a reasonable value of
// Index).
//
// The result of Normalize is the same as the input table, plus
// additional columns for each normalized column. These columns will
// be named "normalized <col>" where <col> is the name of the original
// column and will have type []float64.
type Normalize struct {
// X is the name of the column to use to find the denominator
// row. If X is "", Index is used instead.
X string
// Index is the row index of the denominator row if X is ""
// (otherwise it is ignored). Index may be negative, in which
// case it is added to the number of rows (e.g., -1 is the
// last row).
Index int
// By is a function func([]T) int that returns the index of
// the denominator row given column X. By may be nil, in which
// case it defaults to generic.ArgMin.
By interface{}
// Cols is a slice of the names of columns to normalize
// relative to the corresponding DenomCols value in the
// denominator row. Cols may be nil, in which case it defaults
// to all integral and floating point columns.
Cols []string
// DenomCols is a slice of the names of columns used as the
// demoninator. DenomCols may be nil, in which case it
// defaults to Cols (i.e. each column will be normalized to
// the value from that column in the denominator row.)
// Otherwise, DenomCols must be the same length as Cols.
DenomCols []string
}
func (s Normalize) F(g table.Grouping) table.Grouping {
// Find the columns to normalize.
if s.Cols == nil {
cols := []string{}
for i, ct := range colTypes(g) {
if canNormalize(ct.Elem().Kind()) {
cols = append(cols, g.Columns()[i])
}
}
s.Cols = cols
}
if len(s.Cols) == 0 {
return g
}
// Construct new column names.
newcols := make([]string, len(s.Cols))
for i, col := range s.Cols {
newcols[i] = "normalized " + col
}
// Get "by" function.
var byv reflect.Value
byargs := make([]reflect.Value, 1)
if s.By != nil {
byv = reflect.ValueOf(s.By)
// TODO: Type check byv better.
}
return table.MapTables(g, func(_ table.GroupID, t *table.Table) *table.Table {
if t.Len() == 0 {
return t
}
// Find the denominator row.
var drow int
if s.X == "" {
drow = s.Index
if drow < 0 {
drow += t.Len()
}
} else {
xs := t.MustColumn(s.X)
if s.By == nil {
drow = slice.ArgMin(xs)
} else {
byargs[0] = reflect.ValueOf(xs)
byout := byv.Call(byargs)
drow = int(byout[0].Int())
}
}
// Normalize columns.
newt := table.NewBuilder(t)
denomCols := s.DenomCols
if denomCols == nil {
denomCols = s.Cols
}
for coli, col := range s.Cols {
denom := denomValue(t.MustColumn(denomCols[coli]), drow)
out := normalizeTo(t.MustColumn(col), denom)
newt.Add(newcols[coli], out)
}
return newt.Done()
})
}
func colTypes(g table.Grouping) []reflect.Type {
cts := make([]reflect.Type, len(g.Columns()))
for i, col := range g.Columns() {
cts[i] = table.ColType(g, col)
}
return cts
}
var canNormalizeKinds = map[reflect.Kind]bool{
reflect.Float32: true,
reflect.Float64: true,
reflect.Int: true,
reflect.Int8: true,
reflect.Int16: true,
reflect.Int32: true,
reflect.Int64: true,
reflect.Uint: true,
reflect.Uintptr: true,
reflect.Uint8: true,
reflect.Uint16: true,
reflect.Uint32: true,
reflect.Uint64: true,
}
func canNormalize(k reflect.Kind) bool {
return canNormalizeKinds[k]
}
func denomValue(s interface{}, index int) float64 {
switch s := s.(type) {
case []float64:
return s[index]
}
return reflect.ValueOf(s).Index(index).Convert(float64Type).Float()
}
func normalizeTo(s interface{}, denom float64) interface{} {
switch s := s.(type) {
case []float64:
out := make([]float64, len(s))
for i, numer := range s {
out[i] = numer / denom
}
return out
}
sv := reflect.ValueOf(s)
out := reflect.MakeSlice(float64SliceType, sv.Len(), sv.Len())
for i, len := 0, sv.Len(); i < len; i++ {
numer := sv.Index(i).Convert(float64Type).Float()
out.Index(i).SetFloat(numer / denom)
}
return out.Interface()
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import (
"fmt"
"github.com/aclements/go-gg/generic/slice"
)
// Concat returns the concatenation of the rows in each matching group
// across gs. All Groupings in gs must have the same set of columns
// (though they need not be in the same order; the column order from
// gs[0] will be used). The GroupIDs in the returned Grouping will be
// the union of the GroupIDs in gs.
func Concat(gs ...Grouping) Grouping {
if len(gs) == 0 {
return new(Table)
}
// Check that all Groupings have the same set of columns. They
// can be in different orders.
colSet := map[string]bool{}
for _, col := range gs[0].Columns() {
colSet[col] = true
}
for i, g2 := range gs[1:] {
diff := len(g2.Columns()) != len(colSet)
if !diff {
for _, col := range g2.Columns() {
if !colSet[col] {
diff = true
break
}
}
}
if diff {
panic(fmt.Sprintf("columns in Groupings 0 and %d differ: %q vs %q", i+1, gs[0].Columns(), g2.Columns()))
}
}
// Collect group IDs.
haveGID := map[GroupID]bool{}
gids := []GroupID{}
for _, g := range gs {
for _, gid := range g.Tables() {
if haveGID[gid] {
continue
}
haveGID[gid] = true
gids = append(gids, gid)
}
}
// Build output groups.
var ng GroupingBuilder
for _, gid := range gids {
// Build output table.
var nt Builder
var cols []slice.T
for _, col := range gs[0].Columns() {
// Is it constant?
isConst := false
var cv interface{}
for _, g := range gs {
t := g.Table(gid)
if t == nil {
continue
}
if cv1, ok := t.Const(col); ok {
if !isConst {
isConst = true
cv = cv1
} else if cv != cv1 {
isConst = false
break
}
} else {
isConst = false
break
}
}
if isConst {
nt.AddConst(col, cv)
continue
}
// Not a constant. Collect slices.
for _, g := range gs {
t := g.Table(gid)
if t == nil {
continue
}
cols = append(cols, t.Column(col))
}
nt.Add(col, slice.Concat(cols...))
cols = cols[:0]
}
ng.Add(gid, nt.Done())
}
return ng.Done()
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import (
"fmt"
"reflect"
"github.com/aclements/go-gg/generic/slice"
)
var boolType = reflect.TypeOf(false)
// Filter filters g to only rows where pred returns true. pred must be
// a function that returns bool and takes len(cols) arguments where
// the type of col[i] is assignable to argument i.
//
// TODO: Create a faster batch variant where pred takes slices.
func Filter(g Grouping, pred interface{}, cols ...string) Grouping {
// TODO: Use generic.TypeError.
predv := reflect.ValueOf(pred)
predt := predv.Type()
if predt.Kind() != reflect.Func || predt.NumIn() != len(cols) || predt.NumOut() != 1 || predt.Out(0) != boolType {
panic("predicate function must be func(col[0], col[1], ...) bool")
}
if len(cols) == 0 {
return g
}
if len(g.Tables()) == 0 {
panic(fmt.Sprintf("unknown column %q", cols[0]))
}
// Type check arguments.
for i, col := range cols {
colt := ColType(g, col)
if !colt.Elem().AssignableTo(predt.In(i)) {
panic(fmt.Sprintf("column %d (type %s) is not assignable to predicate argument %d (type %s)", i, colt.Elem(), i, predt.In(i)))
}
}
args := make([]reflect.Value, len(cols))
colvs := make([]reflect.Value, len(cols))
match := make([]int, 0)
return MapTables(g, func(_ GroupID, t *Table) *Table {
// Get columns.
for i, col := range cols {
colvs[i] = reflect.ValueOf(t.MustColumn(col))
}
// Find the set of row indexes that satisfy pred.
match = match[:0]
for r, len := 0, t.Len(); r < len; r++ {
for c, colv := range colvs {
args[c] = colv.Index(r)
}
if predv.Call(args)[0].Bool() {
match = append(match, r)
}
}
// Create the new table.
if len(match) == t.Len() {
return t
}
var nt Builder
for _, col := range t.Columns() {
nt.Add(col, slice.Select(t.Column(col), match))
}
return nt.Done()
})
}
// FilterEq filters g to only rows where the value in col equals val.
func FilterEq(g Grouping, col string, val interface{}) Grouping {
match := make([]int, 0)
return MapTables(g, func(_ GroupID, t *Table) *Table {
// Find the set of row indexes that match val.
seq := t.MustColumn(col)
match = match[:0]
rv := reflect.ValueOf(seq)
for i, len := 0, rv.Len(); i < len; i++ {
if rv.Index(i).Interface() == val {
match = append(match, i)
}
}
var nt Builder
for _, col := range t.Columns() {
nt.Add(col, slice.Select(t.Column(col), match))
}
return nt.Done()
})
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import (
"fmt"
"reflect"
"strings"
"github.com/aclements/go-gg/generic/slice"
)
// GroupID identifies a group. GroupIDs form a tree, rooted at
// RootGroupID (which is also the zero GroupID).
type GroupID struct {
*groupNode
}
// RootGroupID is the root of the GroupID tree.
var RootGroupID = GroupID{}
type groupNode struct {
parent GroupID
label interface{}
}
// String returns the path to GroupID g in the form "/l1/l2/l3". If g
// is RootGroupID, it returns "/". Each level in the group is formed
// by formatting the label using fmt's "%v" verb. Note that this is
// purely diagnostic; this string may not uniquely identify g.
func (g GroupID) String() string {
if g == RootGroupID {
return "/"
}
parts := []string{}
for p := g; p != RootGroupID; p = p.parent {
part := fmt.Sprintf("/%v", p.label)
parts = append(parts, part)
}
for i, j := 0, len(parts)-1; i < j; i, j = i+1, j-1 {
parts[i], parts[j] = parts[j], parts[i]
}
return strings.Join(parts, "")
}
// Extend returns a new GroupID that is a child of GroupID g. The
// returned GroupID will not be equal to any existing GroupID (even if
// label is not unique among g's children). The label is primarily
// diagnostic; the table package uses it only when printing tables,
// but callers may store semantic information in group labels.
func (g GroupID) Extend(label interface{}) GroupID {
return GroupID{&groupNode{g, label}}
}
// Parent returns the parent of g. The parent of RootGroupID is
// RootGroupID.
func (g GroupID) Parent() GroupID {
if g == RootGroupID {
return RootGroupID
}
return g.parent
}
// Label returns the label of g.
func (g GroupID) Label() interface{} {
return g.label
}
// GroupBy sub-divides all groups such that all of the rows in each
// group have equal values for all of the named columns. The relative
// order of rows with equal values for the named columns is
// maintained. Grouped-by columns become constant columns within each
// group.
func GroupBy(g Grouping, cols ...string) Grouping {
// TODO: This would generate much less garbage if we grouped
// all of cols in one pass.
//
// TODO: This constructs one slice per column per input group,
// but it would be even better if it constructed just one
// slice per column.
if len(cols) == 0 {
return g
}
var out GroupingBuilder
for _, gid := range g.Tables() {
t := g.Table(gid)
if cv, ok := t.Const(cols[0]); ok {
// Grouping by a constant is trivial.
subgid := gid.Extend(cv)
out.Add(subgid, t)
continue
}
c := t.MustColumn(cols[0])
// Create an index on c.
type subgroupInfo struct {
key interface{}
rows []int
}
subgroups := []subgroupInfo{}
keys := make(map[interface{}]int)
seq := reflect.ValueOf(c)
for i := 0; i < seq.Len(); i++ {
x := seq.Index(i).Interface()
sg, ok := keys[x]
if !ok {
sg = len(subgroups)
subgroups = append(subgroups, subgroupInfo{x, []int{}})
keys[x] = sg
}
subgroup := &subgroups[sg]
subgroup.rows = append(subgroup.rows, i)
}
// Count rows in each subgroup.
offsets := make([]int, 1+len(subgroups))
for i := range subgroups {
offsets[i+1] = offsets[i] + len(subgroups[i].rows)
}
// Split each column.
builders := make([]Builder, len(subgroups))
for _, name := range t.Columns() {
if name == cols[0] {
// Promote the group-by column to a
// constant.
for i := range subgroups {
builders[i].AddConst(name, subgroups[i].key)
}
continue
}
if cv, ok := t.Const(name); ok {
// Keep constants constant.
for i := range builders {
builders[i].AddConst(name, cv)
}
continue
}
// Create a slice for all of the values.
col := t.Column(name)
ncol := reflect.MakeSlice(reflect.TypeOf(col), t.Len(), t.Len())
// Shuffle each subgroup into ncol.
for i := range subgroups {
subcol := ncol.Slice(offsets[i], offsets[i+1]).Interface()
slice.SelectInto(subcol, col, subgroups[i].rows)
builders[i].Add(name, subcol)
}
}
// Add tables to output Grouping.
for i := range builders {
subgid := gid.Extend(subgroups[i].key)
out.Add(subgid, builders[i].Done())
}
}
return GroupBy(out.Done(), cols[1:]...)
}
// Ungroup concatenates adjacent Tables in g that share a group parent
// into a Table identified by the parent, undoing the effects of the
// most recent GroupBy operation.
func Ungroup(g Grouping) Grouping {
groups := g.Tables()
if len(groups) == 0 || len(groups) == 1 && groups[0] == RootGroupID {
return g
}
var out GroupingBuilder
runGid := groups[0].Parent()
runTabs := []*Table{}
for _, gid := range groups {
if gid.Parent() != runGid {
// Flush the run.
out.Add(runGid, concatRows(runTabs...))
runGid = gid.Parent()
runTabs = runTabs[:0]
}
runTabs = append(runTabs, g.Table(gid))
}
// Flush the last run.
out.Add(runGid, concatRows(runTabs...))
return out.Done()
}
// Flatten concatenates all of the groups in g into a single Table.
// This is equivalent to repeatedly Ungrouping g.
func Flatten(g Grouping) *Table {
groups := g.Tables()
switch len(groups) {
case 0:
return new(Table)
case 1:
return g.Table(groups[0])
}
tabs := make([]*Table, len(groups))
for i, gid := range groups {
tabs[i] = g.Table(gid)
}
return concatRows(tabs...)
}
// concatRows concatenates the rows of tabs into a single Table. All
// Tables in tabs must all have the same column set.
func concatRows(tabs ...*Table) *Table {
// TODO: Consider making this public. It would have to check
// the columns, and we would probably also want a concatCols.
switch len(tabs) {
case 0:
return new(Table)
case 1:
return tabs[0]
}
// Construct each column.
var out Builder
seqs := make([]slice.T, len(tabs))
for _, col := range tabs[0].Columns() {
seqs = seqs[:0]
for _, tab := range tabs {
seqs = append(seqs, tab.Column(col))
}
out.Add(col, slice.Concat(seqs...))
}
return out.Done()
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import "reflect"
// Head returns the first n rows in each Table of g.
func Head(g Grouping, n int) Grouping {
return headTail(g, n, false)
}
// Tail returns the last n rows in each Table of g.
func Tail(g Grouping, n int) Grouping {
return headTail(g, n, true)
}
func headTail(g Grouping, n int, tail bool) Grouping {
return MapTables(g, func(_ GroupID, t *Table) *Table {
if t.Len() <= n {
return t
}
var nt Builder
for _, col := range t.Columns() {
if cv, ok := t.Const(col); ok {
nt.AddConst(col, cv)
continue
}
cv := reflect.ValueOf(t.Column(col))
if tail {
cv = cv.Slice(t.Len()-n, t.Len())
} else {
cv = cv.Slice(0, n)
}
nt.Add(col, cv.Interface())
}
return nt.Done()
})
}
// HeadTables returns the first n tables in g.
func HeadTables(g Grouping, n int) Grouping {
return headTailTables(g, n, false)
}
// TailTables returns the first n tables in g.
func TailTables(g Grouping, n int) Grouping {
return headTailTables(g, n, true)
}
func headTailTables(g Grouping, n int, tail bool) Grouping {
tables := g.Tables()
if len(tables) <= n {
return g
} else if tail {
tables = tables[len(tables)-n:]
} else {
tables = tables[:n]
}
var ng GroupingBuilder
for _, gid := range tables {
ng.Add(gid, g.Table(gid))
}
return ng.Done()
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import (
"reflect"
"github.com/aclements/go-gg/generic/slice"
)
// Join joins g1 and g2 on tables with identical group IDs where col1
// in g1 equals col2 in g2. It maintains the group order of g1, except
// that groups that aren't in g2 are removed, and maintains the row
// order of g1, followed by the row order of g2.
//
// TODO: Support join on more than one column.
func Join(g1 Grouping, col1 string, g2 Grouping, col2 string) Grouping {
var ng GroupingBuilder
for _, gid := range g1.Tables() {
t1, t2 := g1.Table(gid), g2.Table(gid)
if t2 == nil {
continue
}
// TODO: Optimize for cases where col1 and/or col2 are
// constant.
// Index col2 in t2.
ridx := make(map[interface{}][]int)
rv := reflect.ValueOf(t2.MustColumn(col2))
for i, l := 0, rv.Len(); i < l; i++ {
v := rv.Index(i).Interface()
ridx[v] = append(ridx[v], i)
}
// For each row in t1, find the matching rows in col2
// and build up the row indexes for t1 and t2.
idx1, idx2 := []int{}, []int{}
lv := reflect.ValueOf(t1.MustColumn(col1))
for i, l := 0, lv.Len(); i < l; i++ {
r := ridx[lv.Index(i).Interface()]
for range r {
idx1 = append(idx1, i)
}
idx2 = append(idx2, r...)
}
// Build the joined table.
var nt Builder
for _, col := range t1.Columns() {
if cv, ok := t1.Const(col); ok {
nt.Add(col, cv)
continue
}
nt.Add(col, slice.Select(t1.Column(col), idx1))
}
for _, col := range t2.Columns() {
// Often the join column is the same in both
// and we can skip it because we added it from
// the first table.
if col == col1 && col == col2 {
continue
}
if cv, ok := t2.Const(col); ok {
nt.Add(col, cv)
continue
}
nt.Add(col, slice.Select(t2.Column(col), idx2))
}
ng.Add(gid, nt.Done())
}
return ng.Done()
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import (
"fmt"
"reflect"
"github.com/aclements/go-gg/generic"
)
// MapTables applies f to each Table in g and returns a new Grouping
// with the same group structure as g, but with the Tables returned by
// f.
func MapTables(g Grouping, f func(gid GroupID, table *Table) *Table) Grouping {
var out GroupingBuilder
for _, gid := range g.Tables() {
out.Add(gid, f(gid, g.Table(gid)))
}
return out.Done()
}
// MapCols applies f to a set of input columns to construct a set of
// new output columns.
//
// For each Table in g, MapCols calls f(in[0], in[1], ..., out[0],
// out[1], ...) where in[i] is column incols[i]. f should process the
// values in the input column slices and fill output columns slices
// out[j] accordingly. MapCols returns a new Grouping that adds each
// outcols[j] bound to out[j].
//
// If all of the input columns are constant for a given table, MapCols
// will call f with all slices of length 1. The input column slices
// will contain the constant column values and MapCols will bind each
// output column value out[i][0] as a constant.
func MapCols(g Grouping, f interface{}, incols ...string) func(outcols ...string) Grouping {
return func(outcols ...string) Grouping {
fv := reflect.ValueOf(f)
if fv.Kind() != reflect.Func {
panic(&generic.TypeError{fv.Type(), nil, "must be a function"})
}
ft := fv.Type()
if ft.NumIn() != len(incols)+len(outcols) {
panic(&generic.TypeError{ft, nil, fmt.Sprintf("has the wrong number of arguments; expected %d", len(incols)+len(outcols))})
}
if ft.NumOut() != 0 {
panic(&generic.TypeError{ft, nil, "has the wrong number of results; expected 0"})
}
// Create output column slices.
totalRows := 0
for _, gid := range g.Tables() {
t := g.Table(gid)
colloop:
for _, incol := range incols {
if _, ok := t.Const(incol); !ok {
totalRows += g.Table(gid).Len()
break colloop
}
}
}
ocols := make([]reflect.Value, len(outcols))
for i := range ocols {
ocols[i] = reflect.MakeSlice(ft.In(i+len(incols)), totalRows, totalRows)
}
// Apply f to each group.
var out GroupingBuilder
args := make([]reflect.Value, len(incols)+len(outcols))
opos := 0
for _, gid := range g.Tables() {
t := g.Table(gid)
// Are all inputs are constants?
allConst := true
for _, incol := range incols {
if _, ok := t.Const(incol); !ok {
allConst = false
break
}
}
if allConst {
for i, incol := range incols {
cv, _ := t.Const(incol)
args[i] = reflect.MakeSlice(ColType(t, incol), 1, 1)
args[i].Index(0).Set(reflect.ValueOf(cv))
}
for i, ocol := range ocols {
args[i+len(incols)] = reflect.MakeSlice(ocol.Type(), 1, 1)
}
fv.Call(args)
tb := NewBuilder(t)
for i, outcol := range outcols {
tb.AddConst(outcol, args[i+len(incols)].Index(0).Interface())
}
out.Add(gid, tb.Done())
continue
}
// Prepare arguments.
for i, incol := range incols {
args[i] = reflect.ValueOf(t.MustColumn(incol))
}
for i, ocol := range ocols {
args[i+len(incols)] = ocol.Slice(opos, opos+t.Len())
}
opos += t.Len()
// Call f.
fv.Call(args)
// Add output columns.
tb := NewBuilder(t)
for i, outcol := range outcols {
tb.Add(outcol, args[i+len(incols)].Interface())
}
out.Add(gid, tb.Done())
}
return out.Done()
}
}
// Rename returns g with column 'from' renamed to 'to'. The column
// retains its position.
func Rename(g Grouping, from, to string) Grouping {
return MapTables(g, func(_ GroupID, t *Table) *Table {
t.MustColumn(from)
var nt Builder
for _, col := range t.Columns() {
if col == to {
continue
}
ncol := col
if col == from {
ncol = to
}
if cv, ok := t.Const(col); ok {
nt.AddConst(ncol, cv)
} else {
nt.Add(ncol, t.Column(col))
}
}
return nt.Done()
})
}
// Remove returns g with column 'col' removed.
func Remove(g Grouping, col string) Grouping {
return MapTables(g, func(_ GroupID, t *Table) *Table {
t.MustColumn(col)
var nt Builder
for _, col2 := range t.Columns() {
if col == col2 {
continue
}
if cv, ok := t.Const(col2); ok {
nt.AddConst(col2, cv)
} else {
nt.Add(col2, t.Column(col2))
}
}
return nt.Done()
})
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import (
"reflect"
"strconv"
"github.com/aclements/go-gg/generic"
)
// TableFromStructs converts a []T where T is a struct to a Table
// where the columns of the table correspond to T's exported fields.
func TableFromStructs(structs Slice) *Table {
s := reflectSlice(structs)
st := s.Type()
if st.Elem().Kind() != reflect.Struct {
panic(&generic.TypeError{st, nil, "is not a slice of struct"})
}
var t Builder
rows := s.Len()
var rec func(reflect.Type, []int)
rec = func(typ reflect.Type, index []int) {
for fn := 0; fn < typ.NumField(); fn++ {
field := typ.Field(fn)
if field.PkgPath != "" {
continue
}
oldIndexLen := len(index)
index = append(index, field.Index...)
if field.Anonymous {
rec(field.Type, index)
} else {
col := reflect.MakeSlice(reflect.SliceOf(field.Type), rows, rows)
for i := 0; i < rows; i++ {
col.Index(i).Set(s.Index(i).FieldByIndex(index))
}
t.Add(field.Name, col.Interface())
}
index = index[:oldIndexLen]
}
}
rec(st.Elem(), []int{})
return t.Done()
}
// TableFromStrings converts a [][]string to a Table. This is intended
// for processing external data, such as from CSV files. If coerce is
// true, TableFromStrings will convert columns to []int or []float
// when every string in that column is accepted by strconv.ParseInt or
// strconv.ParseFloat, respectively.
func TableFromStrings(cols []string, rows [][]string, coerce bool) *Table {
var t Builder
for i, col := range cols {
slice := make([]string, len(rows))
for j, row := range rows {
slice[j] = row[i]
}
var colData interface{} = slice
switch {
case coerce && len(slice) > 0:
// Try []int.
var err error
for _, str := range slice {
_, err = strconv.ParseInt(str, 10, 0)
if err != nil {
break
}
}
if err == nil {
nslice := make([]int, len(rows))
for i, str := range slice {
v, _ := strconv.ParseInt(str, 10, 0)
nslice[i] = int(v)
}
colData = nslice
break
}
// Try []float64. This must be done after
// []int. It's also more expensive.
for _, str := range slice {
_, err = strconv.ParseFloat(str, 64)
if err != nil {
break
}
}
if err == nil {
nslice := make([]float64, len(rows))
for i, str := range slice {
nslice[i], _ = strconv.ParseFloat(str, 64)
}
colData = nslice
break
}
}
t.Add(col, colData)
}
return t.Done()
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import (
"reflect"
"github.com/aclements/go-gg/generic"
)
// Pivot converts rows of g into columns. label and value must name
// columns in g, and the label column must have type []string. Pivot
// returns a Grouping with a new column named after each distinct
// value in the label column, where the values in that column
// correspond to the values from the value column. All other columns
// (besides label and value) are copied to the output. If, for a given
// column in an output row, no input row has that column in the label
// column, the output cell will have the zero value for its type.
func Pivot(g Grouping, label, value string) Grouping {
// Find all unique values of label. These are the new columns.
labels := []string{}
lset := map[string]int{}
for _, gid := range g.Tables() {
for _, l := range g.Table(gid).MustColumn(label).([]string) {
if _, ok := lset[l]; !ok {
lset[l] = len(lset)
labels = append(labels, l)
}
}
}
// Get all columns that are not label or value.
groupCols := []string{}
for _, col := range g.Columns() {
if col != label && col != value {
groupCols = append(groupCols, col)
}
}
return MapTables(g, func(_ GroupID, t *Table) *Table {
var nt Builder
// Group by all other columns. Each group in gg
// becomes an output row.
gg := GroupBy(t, groupCols...)
// Copy grouped-by values.
for _, groupCol := range groupCols {
cv := reflect.MakeSlice(reflect.TypeOf(t.Column(groupCol)), len(gg.Tables()), len(gg.Tables()))
for i, gid := range gg.Tables() {
sub := gg.Table(gid)
cv.Index(i).Set(reflect.ValueOf(sub.Column(groupCol)).Index(0))
}
nt.Add(groupCol, cv.Interface())
}
// Initialize new columns.
newCols := make([]reflect.Value, len(lset))
vt := reflect.TypeOf(t.MustColumn(value))
for i := range newCols {
newCols[i] = reflect.MakeSlice(vt, len(gg.Tables()), len(gg.Tables()))
}
// Fill in new columns.
for i, gid := range gg.Tables() {
sub := gg.Table(gid)
vcol := reflect.ValueOf(sub.MustColumn(value))
for j, l := range sub.MustColumn(label).([]string) {
val := vcol.Index(j)
newCols[lset[l]].Index(i).Set(val)
}
}
// Add new columns to output table.
for i, newCol := range newCols {
nt.Add(labels[i], newCol.Interface())
}
return nt.Done()
})
}
// Unpivot converts columns of g into rows. The returned Grouping
// consists of the columns of g *not* listed in cols, plus two columns
// named by the label and value arguments. For each input row in g,
// the returned Grouping will have len(cols) output rows. The i'th
// such output row corresponds to column cols[i] in the input row. The
// label column will contain the name of the unpivoted column,
// cols[i], and the value column will contain that column's value from
// the input row. The values of all other columns in the input row
// will be repeated across the output rows. All columns in cols must
// have the same type.
func Unpivot(g Grouping, label, value string, cols ...string) Grouping {
if len(cols) == 0 {
panic("Unpivot requires at least 1 column")
}
colSet := map[string]bool{}
for _, col := range cols {
colSet[col] = true
}
return MapTables(g, func(_ GroupID, t *Table) *Table {
var nt Builder
// Repeat all other columns len(cols) times.
ntlen := t.Len() * len(cols)
for _, name := range t.Columns() {
if colSet[name] || name == label || name == value {
continue
}
col := reflect.ValueOf(t.Column(name))
ncol := reflect.MakeSlice(col.Type(), ntlen, ntlen)
for i, l := 0, col.Len(); i < l; i++ {
v := col.Index(i)
for j := range cols {
ncol.Index(i*len(cols) + j).Set(v)
}
}
nt.Add(name, ncol.Interface())
}
// Get input columns.
var vt reflect.Type
colvs := make([]reflect.Value, len(cols))
for i, col := range cols {
colvs[i] = reflect.ValueOf(t.MustColumn(col))
if i == 0 {
vt = colvs[i].Type()
} else if vt != colvs[i].Type() {
panic(&generic.TypeError{vt, colvs[i].Type(), "; cannot Unpivot columns with different types"})
}
}
// Create label and value columns.
lcol := make([]string, 0, ntlen)
vcol := reflect.MakeSlice(vt, ntlen, ntlen)
for i := 0; i < t.Len(); i++ {
lcol = append(lcol, cols...)
for j, colv := range colvs {
vcol.Index(i*len(cols) + j).Set(colv.Index(i))
}
}
nt.Add(label, lcol).Add(value, vcol.Interface())
return nt.Done()
})
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import (
"fmt"
"io"
"os"
"reflect"
"strings"
)
// TODO: Have a format struct with options for things like column
// separator, and header separator. Provide some defaults ones for,
// e.g., Markdown, CSV, TSV, and such. Make top-level Print and Fprint
// call methods in some default format.
// Print(...) is shorthand for Fprint(os.Stderr, ...).
func Print(g Grouping, formats ...string) error {
return Fprint(os.Stdout, g, formats...)
}
// Fprint prints Grouping g to w. formats[i] specifies a fmt-style
// format string for column i. If there are more columns than formats,
// remaining columns are formatted with %v (in particular, formats may
// be omitted entirely to use %v for all columns). Numeric columns are
// right aligned; all other column types are left aligned.
func Fprint(w io.Writer, g Grouping, formats ...string) error {
if g.Columns() == nil {
return nil
}
// Convert each column to strings.
ss := make([][]string, len(g.Columns()))
rowFmts := make([]string, len(g.Columns()))
for i, col := range g.Columns() {
format := "%v"
if i < len(formats) {
format = formats[i]
}
// Format column.
var valKind reflect.Kind
ss[i] = []string{col}
for _, gid := range g.Tables() {
seq := reflect.ValueOf(g.Table(gid).Column(col))
for row := 0; row < seq.Len(); row++ {
str := fmt.Sprintf(format, seq.Index(row).Interface())
ss[i] = append(ss[i], str)
}
if valKind == reflect.Invalid {
valKind = seq.Type().Elem().Kind()
}
}
// Find column width.
width := 0
for _, s := range ss[i] {
if len(s) > width {
width = len(s)
}
}
// If it's a numeric column, right align.
//
// TODO: Even better would be to decimal align, though
// that may require some understanding of the format;
// or we could only do it for the default format.
switch valKind {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64:
width = -width
}
if i == len(g.Columns())-1 && width > 0 {
// Don't pad the last column.
rowFmts[i] = "%s"
} else {
rowFmts[i] = fmt.Sprintf("%%%ds", -width)
}
}
// Compute group headers.
groups := []GroupID{}
groupPos := []int{}
lastPos := 1
for _, gid := range g.Tables() {
groups = append(groups, gid)
groupPos = append(groupPos, lastPos)
lastPos += g.Table(gid).Len()
}
if len(groups) == 1 && groups[0] == RootGroupID {
groups, groupPos = nil, nil
}
// Print rows.
rowFmt := strings.Join(rowFmts, " ") + "\n"
rowBuf := make([]interface{}, len(rowFmts))
for row := 0; row < len(ss[0]); row++ {
if len(groupPos) > 0 && row == groupPos[0] {
_, err := fmt.Fprintf(w, "-- %s\n", groups[0])
if err != nil {
return err
}
groups, groupPos = groups[1:], groupPos[1:]
}
for col := range rowBuf {
rowBuf[col] = ss[col][row]
}
_, err := fmt.Fprintf(w, rowFmt, rowBuf...)
if err != nil {
return err
}
}
return nil
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package table
import (
"sort"
"github.com/aclements/go-gg/generic/slice"
)
// SortBy sorts each group of g by the named columns. If a column's
// type implements sort.Interface, rows will be sorted according to
// that order. Otherwise, the values in the column must be naturally
// ordered (their types must be orderable by the Go specification). If
// neither is true, SortBy panics with a *generic.TypeError. If more
// than one column is given, SortBy sorts by the tuple of the columns;
// that is, if two values in the first column are equal, they are
// sorted by the second column, and so on.
func SortBy(g Grouping, cols ...string) Grouping {
// Sort each group.
sorters := make([]sort.Interface, len(cols))
return MapTables(g, func(_ GroupID, t *Table) *Table {
// Create sorters for each column.
sorters = sorters[:0]
for _, col := range cols {
if _, ok := t.Const(col); ok {
continue
}
seq := t.MustColumn(col)
sorter := slice.Sorter(seq)
if sort.IsSorted(sorter) {
continue
}
sorters = append(sorters, sorter)
}
if len(sorters) == 0 {
// Avoid shuffling everything by the identity
// permutation.
return t
}
// Generate an initial permutation sequence.
perm := make([]int, t.Len())
for i := range perm {
perm[i] = i
}
// Sort the permutation sequence.
sort.Stable(&permSort{perm, sorters})
// Permute all columns.
var nt Builder
for _, name := range t.Columns() {
if cv, ok := t.Const(name); ok {
nt.AddConst(name, cv)
continue
}
seq := t.Column(name)
seq = slice.Select(seq, perm)
nt.Add(name, seq)
}
return nt.Done()
})
}
type permSort struct {
perm []int
keys []sort.Interface
}
func (s *permSort) Len() int {
return len(s.perm)
}
func (s *permSort) Less(i, j int) bool {
// Since there's no way to ask about equality, we have to do
// extra work for all of the keys except the last.
for _, key := range s.keys[:len(s.keys)-1] {
if key.Less(s.perm[i], s.perm[j]) {
return true
} else if key.Less(s.perm[j], s.perm[i]) {
return false
}
}
return s.keys[len(s.keys)-1].Less(s.perm[i], s.perm[j])
}
func (s *permSort) Swap(i, j int) {
s.perm[i], s.perm[j] = s.perm[j], s.perm[i]
}
This diff is collapsed.
Copyright (c) 2015 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mathx
import "math"
func lgamma(x float64) float64 {
y, _ := math.Lgamma(x)
return y
}
// Beta returns the value of the complete beta function B(a, b).
func Beta(a, b float64) float64 {
// B(x,y) = Γ(x)Γ(y) / Γ(x+y)
return math.Exp(lgamma(a) + lgamma(b) - lgamma(a+b))
}
// BetaInc returns the value of the regularized incomplete beta
// function Iₓ(a, b).
//
// This is not to be confused with the "incomplete beta function",
// which can be computed as BetaInc(x, a, b)*Beta(a, b).
//
// If x < 0 or x > 1, returns NaN.
func BetaInc(x, a, b float64) float64 {
// Based on Numerical Recipes in C, section 6.4. This uses the
// continued fraction definition of I:
//
// (xᵃ*(1-x)ᵇ)/(a*B(a,b)) * (1/(1+(d₁/(1+(d₂/(1+...))))))
//
// where B(a,b) is the beta function and
//
// d_{2m+1} = -(a+m)(a+b+m)x/((a+2m)(a+2m+1))
// d_{2m} = m(b-m)x/((a+2m-1)(a+2m))
if x < 0 || x > 1 {
return math.NaN()
}
bt := 0.0
if 0 < x && x < 1 {
// Compute the coefficient before the continued
// fraction.
bt = math.Exp(lgamma(a+b) - lgamma(a) - lgamma(b) +
a*math.Log(x) + b*math.Log(1-x))
}
if x < (a+1)/(a+b+2) {
// Compute continued fraction directly.
return bt * betacf(x, a, b) / a
} else {
// Compute continued fraction after symmetry transform.
return 1 - bt*betacf(1-x, b, a)/b
}
}
// betacf is the continued fraction component of the regularized
// incomplete beta function Iₓ(a, b).
func betacf(x, a, b float64) float64 {
const maxIterations = 200
const epsilon = 3e-14
raiseZero := func(z float64) float64 {
if math.Abs(z) < math.SmallestNonzeroFloat64 {
return math.SmallestNonzeroFloat64
}
return z
}
c := 1.0
d := 1 / raiseZero(1-(a+b)*x/(a+1))
h := d
for m := 1; m <= maxIterations; m++ {
mf := float64(m)
// Even step of the recurrence.
numer := mf * (b - mf) * x / ((a + 2*mf - 1) * (a + 2*mf))
d = 1 / raiseZero(1+numer*d)
c = raiseZero(1 + numer/c)
h *= d * c
// Odd step of the recurrence.
numer = -(a + mf) * (a + b + mf) * x / ((a + 2*mf) * (a + 2*mf + 1))
d = 1 / raiseZero(1+numer*d)
c = raiseZero(1 + numer/c)
hfac := d * c
h *= hfac
if math.Abs(hfac-1) < epsilon {
return h
}
}
panic("betainc: a or b too big; failed to converge")
}
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mathx
import "math"
const smallFactLimit = 20 // 20! => 62 bits
var smallFact [smallFactLimit + 1]int64
func init() {
smallFact[0] = 1
fact := int64(1)
for n := int64(1); n <= smallFactLimit; n++ {
fact *= n
smallFact[n] = fact
}
}
// Choose returns the binomial coefficient of n and k.
func Choose(n, k int) float64 {
if k == 0 || k == n {
return 1
}
if k < 0 || n < k {
return 0
}
if n <= smallFactLimit { // Implies k <= smallFactLimit
// It's faster to do several integer multiplications
// than it is to do an extra integer division.
// Remarkably, this is also faster than pre-computing
// Pascal's triangle (presumably because this is very
// cache efficient).
numer := int64(1)
for n1 := int64(n - (k - 1)); n1 <= int64(n); n1++ {
numer *= n1
}
denom := smallFact[k]
return float64(numer / denom)
}
return math.Exp(lchoose(n, k))
}
// Lchoose returns math.Log(Choose(n, k)).
func Lchoose(n, k int) float64 {
if k == 0 || k == n {
return 0
}
if k < 0 || n < k {
return math.NaN()
}
return lchoose(n, k)
}
func lchoose(n, k int) float64 {
a, _ := math.Lgamma(float64(n + 1))
b, _ := math.Lgamma(float64(k + 1))
c, _ := math.Lgamma(float64(n - k + 1))
return a - b - c
}
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mathx
import "math"
// GammaInc returns the value of the incomplete gamma function (also
// known as the regularized gamma function):
//
// P(a, x) = 1 / Γ(a) * ∫₀ˣ exp(-t) t**(a-1) dt
func GammaInc(a, x float64) float64 {
// Based on Numerical Recipes in C, section 6.2.
if a <= 0 || x < 0 || math.IsNaN(a) || math.IsNaN(x) {
return math.NaN()
}
if x < a+1 {
// Use the series representation, which converges more
// rapidly in this range.
return gammaIncSeries(a, x)
} else {
// Use the continued fraction representation.
return 1 - gammaIncCF(a, x)
}
}
// GammaIncComp returns the complement of the incomplete gamma
// function 1 - GammaInc(a, x). This is more numerically stable for
// values near 0.
func GammaIncComp(a, x float64) float64 {
if a <= 0 || x < 0 || math.IsNaN(a) || math.IsNaN(x) {
return math.NaN()
}
if x < a+1 {
return 1 - gammaIncSeries(a, x)
} else {
return gammaIncCF(a, x)
}
}
func gammaIncSeries(a, x float64) float64 {
const maxIterations = 200
const epsilon = 3e-14
if x == 0 {
return 0
}
ap := a
del := 1 / a
sum := del
for n := 0; n < maxIterations; n++ {
ap++
del *= x / ap
sum += del
if math.Abs(del) < math.Abs(sum)*epsilon {
return sum * math.Exp(-x+a*math.Log(x)-lgamma(a))
}
}
panic("a too large; failed to converge")
}
func gammaIncCF(a, x float64) float64 {
const maxIterations = 200
const epsilon = 3e-14
raiseZero := func(z float64) float64 {
if math.Abs(z) < math.SmallestNonzeroFloat64 {
return math.SmallestNonzeroFloat64
}
return z
}
b := x + 1 - a
c := math.MaxFloat64
d := 1 / b
h := d
for i := 1; i <= maxIterations; i++ {
an := -float64(i) * (float64(i) - a)
b += 2
d = raiseZero(an*d + b)
c = raiseZero(b + an/c)
d = 1 / d
del := d * c
h *= del
if math.Abs(del-1) < epsilon {
return math.Exp(-x+a*math.Log(x)-lgamma(a)) * h
}
}
panic("a too large; failed to converge")
}
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package mathx implements special functions not provided by the
// standard math package.
package mathx // import "github.com/aclements/go-moremath/mathx"
import "math"
var nan = math.NaN()
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mathx
// Sign returns the sign of x: -1 if x < 0, 0 if x == 0, 1 if x > 0.
// If x is NaN, it returns NaN.
func Sign(x float64) float64 {
if x == 0 {
return 0
} else if x < 0 {
return -1
} else if x > 0 {
return 1
}
return nan
}
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stats
// Miscellaneous helper algorithms
import (
"fmt"
"github.com/aclements/go-moremath/mathx"
)
func maxint(a, b int) int {
if a > b {
return a
}
return b
}
func minint(a, b int) int {
if a < b {
return a
}
return b
}
func sumint(xs []int) int {
sum := 0
for _, x := range xs {
sum += x
}
return sum
}
// bisect returns an x in [low, high] such that |f(x)| <= tolerance
// using the bisection method.
//
// f(low) and f(high) must have opposite signs.
//
// If f does not have a root in this interval (e.g., it is
// discontiguous), this returns the X of the apparent discontinuity
// and false.
func bisect(f func(float64) float64, low, high, tolerance float64) (float64, bool) {
flow, fhigh := f(low), f(high)
if -tolerance <= flow && flow <= tolerance {
return low, true
}
if -tolerance <= fhigh && fhigh <= tolerance {
return high, true
}
if mathx.Sign(flow) == mathx.Sign(fhigh) {
panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%g f(%g)=%g", low, flow, high, fhigh))
}
for {
mid := (high + low) / 2
fmid := f(mid)
if -tolerance <= fmid && fmid <= tolerance {
return mid, true
}
if mid == high || mid == low {
return mid, false
}
if mathx.Sign(fmid) == mathx.Sign(flow) {
low = mid
flow = fmid
} else {
high = mid
fhigh = fmid
}
}
}
// bisectBool implements the bisection method on a boolean function.
// It returns x1, x2 ∈ [low, high], x1 < x2 such that f(x1) != f(x2)
// and x2 - x1 <= xtol.
//
// If f(low) == f(high), it panics.
func bisectBool(f func(float64) bool, low, high, xtol float64) (x1, x2 float64) {
flow, fhigh := f(low), f(high)
if flow == fhigh {
panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%v f(%g)=%v", low, flow, high, fhigh))
}
for {
if high-low <= xtol {
return low, high
}
mid := (high + low) / 2
if mid == high || mid == low {
return low, high
}
fmid := f(mid)
if fmid == flow {
low = mid
flow = fmid
} else {
high = mid
fhigh = fmid
}
}
}
// series returns the sum of the series f(0), f(1), ...
//
// This implementation is fast, but subject to round-off error.
func series(f func(float64) float64) float64 {
y, yp := 0.0, 1.0
for n := 0.0; y != yp; n++ {
yp = y
y += f(n)
}
return y
}
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stats
// DeltaDist is the Dirac delta function, centered at T, with total
// area 1.
//
// The CDF of the Dirac delta function is the Heaviside step function,
// centered at T. Specifically, f(T) == 1.
type DeltaDist struct {
T float64
}
func (d DeltaDist) PDF(x float64) float64 {
if x == d.T {
return inf
}
return 0
}
func (d DeltaDist) pdfEach(xs []float64) []float64 {
res := make([]float64, len(xs))
for i, x := range xs {
if x == d.T {
res[i] = inf
}
}
return res
}
func (d DeltaDist) CDF(x float64) float64 {
if x >= d.T {
return 1
}
return 0
}
func (d DeltaDist) cdfEach(xs []float64) []float64 {
res := make([]float64, len(xs))
for i, x := range xs {
res[i] = d.CDF(x)
}
return res
}
func (d DeltaDist) InvCDF(y float64) float64 {
if y < 0 || y > 1 {
return nan
}
return d.T
}
func (d DeltaDist) Bounds() (float64, float64) {
return d.T - 1, d.T + 1
}
This diff is collapsed.
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stats
import "math"
// TODO: Implement histograms on top of scales.
type Histogram interface {
// Add adds a sample with value x to histogram h.
Add(x float64)
// Counts returns the number of samples less than the lowest
// bin, a slice of the number of samples in each bin,
// and the number of samples greater than the highest bin.
Counts() (under uint, counts []uint, over uint)
// BinToValue returns the value that would appear at the given
// bin index.
//
// For integral values of bin, BinToValue returns the lower
// bound of bin. That is, a sample value x will be in bin if
// bin is integral and
//
// BinToValue(bin) <= x < BinToValue(bin + 1)
//
// For non-integral values of bin, BinToValue interpolates
// between the lower and upper bounds of math.Floor(bin).
//
// BinToValue is undefined if bin > 1 + the number of bins.
BinToValue(bin float64) float64
}
// HistogramQuantile returns the x such that n*q samples in hist are
// <= x, assuming values are distibuted within each bin according to
// hist's distribution.
//
// If the q'th sample falls below the lowest bin or above the highest
// bin, returns NaN.
func HistogramQuantile(hist Histogram, q float64) float64 {
under, counts, over := hist.Counts()
total := under + over
for _, count := range counts {
total += count
}
goal := uint(float64(total) * q)
if goal <= under || goal > total-over {
return math.NaN()
}
for bin, count := range counts {
if count > goal {
return hist.BinToValue(float64(bin) + float64(goal)/float64(count))
}
goal -= count
}
panic("goal count not reached")
}
// HistogramIQR returns the interquartile range of the samples in
// hist.
func HistogramIQR(hist Histogram) float64 {
return HistogramQuantile(hist, 0.75) - HistogramQuantile(hist, 0.25)
}
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stats
import (
"math"
"github.com/aclements/go-moremath/mathx"
)
// HypergeometicDist is a hypergeometric distribution.
type HypergeometicDist struct {
// N is the size of the population. N >= 0.
N int
// K is the number of successes in the population. 0 <= K <= N.
K int
// Draws is the number of draws from the population. This is
// usually written "n", but is called Draws here because of
// limitations on Go identifier naming. 0 <= Draws <= N.
Draws int
}
// PMF is the probability of getting exactly int(k) successes in
// d.Draws draws with replacement from a population of size d.N that
// contains exactly d.K successes.
func (d HypergeometicDist) PMF(k float64) float64 {
ki := int(math.Floor(k))
l, h := d.bounds()
if ki < l || ki > h {
return 0
}
return d.pmf(ki)
}
func (d HypergeometicDist) pmf(k int) float64 {
return math.Exp(mathx.Lchoose(d.K, k) + mathx.Lchoose(d.N-d.K, d.Draws-k) - mathx.Lchoose(d.N, d.Draws))
}
// CDF is the probability of getting int(k) or fewer successes in
// d.Draws draws with replacement from a population of size d.N that
// contains exactly d.K successes.
func (d HypergeometicDist) CDF(k float64) float64 {
// Based on Klotz, A Computational Approach to Statistics.
ki := int(math.Floor(k))
l, h := d.bounds()
if ki < l {
return 0
} else if ki >= h {
return 1
}
// Use symmetry to compute the smaller sum.
flip := false
if ki > (d.Draws+1)/(d.N+1)*(d.K+1) {
flip = true
ki = d.K - ki - 1
d.Draws = d.N - d.Draws
}
p := d.pmf(ki) * d.sum(ki)
if flip {
p = 1 - p
}
return p
}
func (d HypergeometicDist) sum(k int) float64 {
const epsilon = 1e-14
sum, ak := 1.0, 1.0
L := maxint(0, d.Draws+d.K-d.N)
for dk := 1; dk <= k-L && ak/sum > epsilon; dk++ {
ak *= float64(1+k-dk) / float64(d.Draws-k+dk)
ak *= float64(d.N-d.K-d.Draws+k+1-dk) / float64(d.K-k+dk)
sum += ak
}
return sum
}
func (d HypergeometicDist) bounds() (int, int) {
return maxint(0, d.Draws+d.K-d.N), minint(d.Draws, d.K)
}
func (d HypergeometicDist) Bounds() (float64, float64) {
l, h := d.bounds()
return float64(l), float64(h)
}
func (d HypergeometicDist) Step() float64 {
return 1
}
func (d HypergeometicDist) Mean() float64 {
return float64(d.Draws*d.K) / float64(d.N)
}
func (d HypergeometicDist) Variance() float64 {
return float64(d.Draws*d.K*(d.N-d.K)*(d.N-d.Draws)) /
float64(d.N*d.N*(d.N-1))
}
This diff is collapsed.
// generated by stringer -type=KDEBoundaryMethod; DO NOT EDIT
package stats
import "fmt"
const _KDEBoundaryMethod_name = "BoundaryReflect"
var _KDEBoundaryMethod_index = [...]uint8{0, 15}
func (i KDEBoundaryMethod) String() string {
if i < 0 || i+1 >= KDEBoundaryMethod(len(_KDEBoundaryMethod_index)) {
return fmt.Sprintf("KDEBoundaryMethod(%d)", i)
}
return _KDEBoundaryMethod_name[_KDEBoundaryMethod_index[i]:_KDEBoundaryMethod_index[i+1]]
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package vec provides functions for float64 vectors.
package vec // import "github.com/aclements/go-moremath/vec"
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment