stats: rename percentile to quantile

aclements · aclements · commit 0ff62e0875ff · 2016-10-14T14:41:02.000-04:00
The percentile interfaces were wrong because they actually expected a
*quantile* in the range [0, 1], not a percentile in the range of [0,
100]. Fix this by renaming them all to "quantile".

Alternatively, I could have kept them named "percentile" and changed
the range, but that would have caused silent failures in callers.
diff --git a/cmd/dist/dist.go b/cmd/dist/dist.go
@@ -57,7 +57,7 @@ func main() {
 		if !ok {
 			label = fmt.Sprintf("%d%%ile", p)
 		}
-		fmt.Printf("%8s %.6g\n", label, s.Percentile(float64(p)/100))
+		fmt.Printf("%8s %.6g\n", label, s.Quantile(float64(p)/100))
 	}
 	fmt.Println()
 
diff --git a/stats/hist.go b/stats/hist.go
@@ -33,20 +33,20 @@ type Histogram interface {
 	BinToValue(bin float64) float64
 }
 
-// HistogramPercentile returns the x such that n*percentile samples in
-// hist are <= x, assuming values are distibuted within each bin
-// according to hist's distibution.
+// HistogramQuantile returns the x such that n*q samples in hist are
+// <= x, assuming values are distibuted within each bin according to
+// hist's distribution.
 //
-// If the percentile'th sample falls below the lowest bin or above the
-// highest bin, returns NaN.
-func HistogramPercentile(hist Histogram, percentile float64) float64 {
+// If the q'th sample falls below the lowest bin or above the highest
+// bin, returns NaN.
+func HistogramQuantile(hist Histogram, q float64) float64 {
 	under, counts, over := hist.Counts()
 	total := under + over
 	for _, count := range counts {
 		total += count
 	}
 
-	goal := uint(float64(total) * percentile)
+	goal := uint(float64(total) * q)
 	if goal <= under || goal > total-over {
 		return math.NaN()
 	}
@@ -62,5 +62,5 @@ func HistogramPercentile(hist Histogram, percentile float64) float64 {
 // HistogramIQR returns the interquartile range of the samples in
 // hist.
 func HistogramIQR(hist Histogram) float64 {
-	return HistogramPercentile(hist, 0.75) - HistogramPercentile(hist, 0.25)
+	return HistogramQuantile(hist, 0.75) - HistogramQuantile(hist, 0.25)
 }
diff --git a/stats/kde.go b/stats/kde.go
@@ -78,9 +78,9 @@ func BandwidthSilverman(data interface {
 func BandwidthScott(data interface {
 	StdDev() float64
 	Weight() float64
-	Percentile(float64) float64
+	Quantile(float64) float64
 }) float64 {
-	iqr := data.Percentile(0.75) - data.Percentile(0.25)
+	iqr := data.Quantile(0.75) - data.Quantile(0.25)
 	hScale := 1.06 * math.Pow(data.Weight(), -1.0/5)
 	stdDev := data.StdDev()
 	if stdDev < iqr/1.349 {
diff --git a/stats/sample.go b/stats/sample.go
@@ -217,23 +217,25 @@ func (s Sample) StdDev() float64 {
 	panic("Weighted StdDev not implemented")
 }
 
-// Percentile returns the pctileth value from the Sample. This uses
-// interpolation method R8 from Hyndman and Fan (1996).
+// Quantile returns the sample value X at which q*weight of the sample
+// is <= X. This uses interpolation method R8 from Hyndman and Fan
+// (1996).
 //
-// pctile will be capped to the range [0, 1]. If len(xs) == 0 or all
+// q will be capped to the range [0, 1]. If len(xs) == 0 or all
 // weights are 0, returns NaN.
 //
-// Percentile(0.5) is the median. Percentile(0.25) and
-// Percentile(0.75) are the first and third quartiles, respectively.
+// Quantile(0.5) is the median. Quantile(0.25) and Quantile(0.75) are
+// the first and third quartiles, respectively. Quantile(P/100) is the
+// P'th percentile.
 //
 // This is constant time if s.Sorted and s.Weights == nil.
-func (s Sample) Percentile(pctile float64) float64 {
+func (s Sample) Quantile(q float64) float64 {
 	if len(s.Xs) == 0 {
 		return math.NaN()
-	} else if pctile <= 0 {
+	} else if q <= 0 {
 		min, _ := s.Bounds()
 		return min
-	} else if pctile >= 1 {
+	} else if q >= 1 {
 		_, max := s.Bounds()
 		return max
 	}
@@ -245,8 +247,8 @@ func (s Sample) Percentile(pctile float64) float64 {
 
 	if s.Weights == nil {
 		N := float64(len(s.Xs))
-		//n := pctile * (N + 1) // R6
-		n := 1/3.0 + pctile*(N+1/3.0) // R8
+		//n := q * (N + 1) // R6
+		n := 1/3.0 + q*(N+1/3.0) // R8
 		kf, frac := math.Modf(n)
 		k := int(kf)
 		if k <= 0 {
@@ -258,7 +260,7 @@ func (s Sample) Percentile(pctile float64) float64 {
 	} else {
 		// TODO(austin): Implement interpolation
 
-		target := s.Weight() * pctile
+		target := s.Weight() * q
 
 		// TODO(austin) If we had cumulative weights, we could
 		// do this in log time.
@@ -279,7 +281,7 @@ func (s Sample) IQR() float64 {
 	if !s.Sorted {
 		s = *s.Copy().Sort()
 	}
-	return s.Percentile(0.75) - s.Percentile(0.25)
+	return s.Quantile(0.75) - s.Quantile(0.25)
 }
 
 type sampleSorter struct {
diff --git a/stats/sample_test.go b/stats/sample_test.go
@@ -6,9 +6,9 @@ package stats
 
 import "testing"
 
-func TestSamplePercentile(t *testing.T) {
+func TestSampleQuantile(t *testing.T) {
 	s := Sample{Xs: []float64{15, 20, 35, 40, 50}}
-	testFunc(t, "Percentile", s.Percentile, map[float64]float64{
+	testFunc(t, "Quantile", s.Quantile, map[float64]float64{
 		-1:  15,
 		0:   15,
 		.05: 15,

Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ func main() {`
`57`	`57`	`if !ok {`
`58`	`58`	`label = fmt.Sprintf("%d%%ile", p)`
`59`	`59`	`}`
`60`		`- fmt.Printf("%8s %.6g\n", label, s.Percentile(float64(p)/100))`
	`60`	`+ fmt.Printf("%8s %.6g\n", label, s.Quantile(float64(p)/100))`
`61`	`61`	`}`
`62`	`62`	`fmt.Println()`
`63`	`63`