Skip to content

Commit 0047852

Browse files
authored
Merge pull request #308 from data-8/multinomial
add sample_proportions
2 parents 5eaeb4e + 211a21b commit 0047852

File tree

4 files changed

+42
-8
lines changed

4 files changed

+42
-8
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,13 @@ pip install datascience
2727

2828
This project adheres to [Semantic Versioning](http://semver.org/).
2929

30+
### v0.10.4
31+
32+
- Add `sample_proportions` function.
33+
3034
### v0.10.3
3135

32-
- Fix `OrderedDict` bug in `Table.hist`
36+
- Fix `OrderedDict` bug in `Table.hist`.
3337

3438
### v0.10.2
3539

datascience/util.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
"""Utility functions"""
22

33
__all__ = ['make_array', 'percentile', 'plot_cdf_area', 'plot_normal_cdf',
4-
'table_apply', 'proportions_from_distribution', 'minimize']
4+
'table_apply', 'proportions_from_distribution',
5+
'sample_proportions', 'minimize']
56

67
import numpy as np
78
import pandas as pd
@@ -105,6 +106,23 @@ def plot_normal_cdf(rbound=None, lbound=None, mean=0, sd=1):
105106
plot_cdf_area = plot_normal_cdf
106107

107108

109+
def sample_proportions(sample_size, probabilities):
110+
"""Return the proportion of random draws for each outcome in a distribution.
111+
112+
This function is similar to np.random.multinomial, but returns proportions
113+
instead of counts.
114+
115+
Args:
116+
``sample_size``: The size of the sample to draw from the distribution.
117+
118+
``probabilities``: An array of probabilities that forms a distribution.
119+
120+
Returns:
121+
An array with the same length as ``probability`` that sums to 1.
122+
"""
123+
return np.random.multinomial(sample_size, probabilities) / sample_size
124+
125+
108126
def proportions_from_distribution(table, label, sample_size,
109127
column_name='Random Sample'):
110128
"""
@@ -115,8 +133,6 @@ def proportions_from_distribution(table, label, sample_size,
115133
from the distribution in ``table.column(label)``, then divides by
116134
``sample_size`` to create the resulting column of proportions.
117135
118-
Returns a new ``Table`` and does not modify ``table``.
119-
120136
Args:
121137
``table``: An instance of ``Table``.
122138
@@ -136,8 +152,7 @@ def proportions_from_distribution(table, label, sample_size,
136152
``ValueError``: If the ``label`` is not in the table, or if
137153
``table.column(label)`` does not sum to 1.
138154
"""
139-
proportions = (np.random.multinomial(sample_size, table.column(label)) /
140-
sample_size)
155+
proportions = sample_proportions(sample_size, table.column(label))
141156
return table.with_column('Random Sample', proportions)
142157

143158

@@ -225,4 +240,3 @@ def objective(args):
225240
return result.x.item(0)
226241
else:
227242
return result.x
228-

datascience/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.10.3'
1+
__version__ = '0.10.4'

tests/test_util.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,19 @@ def test_minimize_smooth():
6666
def test_minimize_array():
6767
assert _round_eq(2, ds.minimize(lambda x: (x[0]-2)**2, [0], array=True))
6868
assert _round_eq([2, 1], list(ds.minimize(lambda x: (x[0]-2)**2 + (x[1]-1)**2, [0, 0], array=True)))
69+
70+
71+
def test_sample_proportions():
72+
uniform = ds.sample_proportions(1000, np.ones(50)/50)
73+
assert len(uniform) == 50 and _round_eq(1, sum(uniform))
74+
assert [x in (0, 0.5, 1) for x in ds.sample_proportions(2, ds.make_array(.2, .3, .5))]
75+
76+
77+
def test_proportions_from_distribution():
78+
t = ds.Table().with_column('probs', np.ones(50)/50)
79+
u = ds.proportions_from_distribution(t, 'probs', 1000)
80+
assert t.num_columns == 1 and t.num_rows == 50
81+
assert u.num_columns == 2 and u.num_rows == 50
82+
uniform = u.column(1)
83+
assert len(uniform) == 50 and _round_eq(1, sum(uniform))
84+
assert [x in (0, 0.5, 1) for x in ds.sample_proportions(2, ds.make_array(.2, .3, .5))]

0 commit comments

Comments
 (0)