Skip to content

Commit 612d3f9

Browse files
Merge pull request #3305 from plotly/extended_data
add some extra options to various demo datasets
2 parents 09bbd75 + ee03bcc commit 612d3f9

File tree

3 files changed

+83
-76
lines changed

3 files changed

+83
-76
lines changed

Diff for: CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ This project adheres to [Semantic Versioning](http://semver.org/).
44

55
## UNRELEASED
66

7+
8+
### Added
9+
- Extra flags were added to the `gapminder` and `stocks` dataset to facilitate testing, documentation and demos [#3305](https://github.com/plotly/plotly.py/issues/3305)
10+
711
### Fixed
812
- Fixed regression introduced in version 5.0.0 where pandas/numpy arrays with `dtype` of Object were being converted to `list` values when added to a Figure ([#3292](https://github.com/plotly/plotly.py/issues/3292), [#3293](https://github.com/plotly/plotly.py/pull/3293))
913

Diff for: packages/python/plotly/plotly/data/__init__.py

+79-76
Original file line numberDiff line numberDiff line change
@@ -3,78 +3,83 @@
33
"""
44

55

6-
def gapminder():
6+
def gapminder(datetimes=False, centroids=False, year=None):
77
"""
8-
Each row represents a country on a given year.
8+
Each row represents a country on a given year.
99
10-
https://www.gapminder.org/data/
10+
https://www.gapminder.org/data/
1111
12-
Returns:
13-
A `pandas.DataFrame` with 1704 rows and the following columns:
14-
`['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
15-
'iso_alpha', 'iso_num']`.
16-
"""
17-
return _get_dataset("gapminder")
12+
Returns:
13+
A `pandas.DataFrame` with 1704 rows and the following columns:
14+
`['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
15+
'iso_alpha', 'iso_num']`.
16+
If `datetimes` is True, the 'year' column will be a datetime column
17+
If `centroids` is True, two new columns are added: ['centroid_lat', 'centroid_lon']
18+
If `year` is an integer, the dataset will be filtered for that year
19+
"""
20+
df = _get_dataset("gapminder")
21+
if datetimes:
22+
df["year"] = (df["year"].astype(str) + "-01-01").astype("datetime64[ns]")
23+
if not centroids:
24+
df.drop(["centroid_lat", "centroid_lon"], axis=1, inplace=True)
25+
if year:
26+
df = df.query("year == %d" % year)
27+
return df
1828

1929

2030
def tips():
2131
"""
22-
Each row represents a restaurant bill.
32+
Each row represents a restaurant bill.
2333
24-
https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html
34+
https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html
2535
26-
Returns:
27-
A `pandas.DataFrame` with 244 rows and the following columns:
28-
`['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`.
29-
"""
36+
Returns:
37+
A `pandas.DataFrame` with 244 rows and the following columns:
38+
`['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`."""
3039
return _get_dataset("tips")
3140

3241

3342
def iris():
3443
"""
35-
Each row represents a flower.
44+
Each row represents a flower.
3645
37-
https://en.wikipedia.org/wiki/Iris_flower_data_set
46+
https://en.wikipedia.org/wiki/Iris_flower_data_set
3847
39-
Returns:
40-
A `pandas.DataFrame` with 150 rows and the following columns:
41-
`['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`.
42-
"""
48+
Returns:
49+
A `pandas.DataFrame` with 150 rows and the following columns:
50+
`['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`."""
4351
return _get_dataset("iris")
4452

4553

4654
def wind():
4755
"""
48-
Each row represents a level of wind intensity in a cardinal direction, and its frequency.
56+
Each row represents a level of wind intensity in a cardinal direction, and its frequency.
4957
50-
Returns:
51-
A `pandas.DataFrame` with 128 rows and the following columns:
52-
`['direction', 'strength', 'frequency']`.
53-
"""
58+
Returns:
59+
A `pandas.DataFrame` with 128 rows and the following columns:
60+
`['direction', 'strength', 'frequency']`."""
5461
return _get_dataset("wind")
5562

5663

5764
def election():
5865
"""
59-
Each row represents voting results for an electoral district in the 2013 Montreal
60-
mayoral election.
66+
Each row represents voting results for an electoral district in the 2013 Montreal
67+
mayoral election.
6168
62-
Returns:
63-
A `pandas.DataFrame` with 58 rows and the following columns:
64-
`['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'district_id']`.
65-
"""
69+
Returns:
70+
A `pandas.DataFrame` with 58 rows and the following columns:
71+
`['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'district_id']`."""
6672
return _get_dataset("election")
6773

6874

6975
def election_geojson():
7076
"""
71-
Each feature represents an electoral district in the 2013 Montreal mayoral election.
77+
Each feature represents an electoral district in the 2013 Montreal mayoral election.
7278
73-
Returns:
74-
A GeoJSON-formatted `dict` with 58 polygon or multi-polygon features whose `id`
75-
is an electoral district numerical ID and whose `district` property is the ID and
76-
district name.
77-
"""
79+
Returns:
80+
A GeoJSON-formatted `dict` with 58 polygon or multi-polygon features whose `id`
81+
is an electoral district numerical ID and whose `district` property is the ID and
82+
district name."""
7883
import gzip
7984
import json
8085
import os
@@ -92,27 +97,28 @@ def election_geojson():
9297

9398
def carshare():
9499
"""
95-
Each row represents the availability of car-sharing services near the centroid of a zone
96-
in Montreal over a month-long period.
100+
Each row represents the availability of car-sharing services near the centroid of a zone
101+
in Montreal over a month-long period.
97102
98-
Returns:
99-
A `pandas.DataFrame` with 249 rows and the following columns:
100-
`['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`.
101-
"""
103+
Returns:
104+
A `pandas.DataFrame` with 249 rows and the following columns:
105+
`['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`."""
102106
return _get_dataset("carshare")
103107

104108

105-
def stocks(indexed=False):
109+
def stocks(indexed=False, datetimes=False):
106110
"""
107-
Each row in this wide dataset represents closing prices from 6 tech stocks in 2018/2019.
108-
109-
Returns:
110-
A `pandas.DataFrame` with 100 rows and the following columns:
111-
`['date', 'GOOG', 'AAPL', 'AMZN', 'FB', 'NFLX', 'MSFT']`.
112-
If `indexed` is True, the 'date' column is used as the index and the column index
113-
is named 'company'
114-
"""
111+
Each row in this wide dataset represents closing prices from 6 tech stocks in 2018/2019.
112+
113+
Returns:
114+
A `pandas.DataFrame` with 100 rows and the following columns:
115+
`['date', 'GOOG', 'AAPL', 'AMZN', 'FB', 'NFLX', 'MSFT']`.
116+
If `indexed` is True, the 'date' column is used as the index and the column index
117+
If `datetimes` is True, the 'date' column will be a datetime column
118+
is named 'company'"""
115119
df = _get_dataset("stocks")
120+
if datetimes:
121+
df["date"] = df["date"].astype("datetime64[ns]")
116122
if indexed:
117123
df = df.set_index("date")
118124
df.columns.name = "company"
@@ -121,15 +127,14 @@ def stocks(indexed=False):
121127

122128
def experiment(indexed=False):
123129
"""
124-
Each row in this wide dataset represents the results of 100 simulated participants
125-
on three hypothetical experiments, along with their gender and control/treatment group.
130+
Each row in this wide dataset represents the results of 100 simulated participants
131+
on three hypothetical experiments, along with their gender and control/treatment group.
126132
127133
128-
Returns:
129-
A `pandas.DataFrame` with 100 rows and the following columns:
130-
`['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'group']`.
131-
If `indexed` is True, the data frame index is named "participant"
132-
"""
134+
Returns:
135+
A `pandas.DataFrame` with 100 rows and the following columns:
136+
`['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'group']`.
137+
If `indexed` is True, the data frame index is named "participant" """
133138
df = _get_dataset("experiment")
134139
if indexed:
135140
df.index.name = "participant"
@@ -138,15 +143,14 @@ def experiment(indexed=False):
138143

139144
def medals_wide(indexed=False):
140145
"""
141-
This dataset represents the medal table for Olympic Short Track Speed Skating for the
142-
top three nations as of 2020.
143-
144-
Returns:
145-
A `pandas.DataFrame` with 3 rows and the following columns:
146-
`['nation', 'gold', 'silver', 'bronze']`.
147-
If `indexed` is True, the 'nation' column is used as the index and the column index
148-
is named 'medal'
149-
"""
146+
This dataset represents the medal table for Olympic Short Track Speed Skating for the
147+
top three nations as of 2020.
148+
149+
Returns:
150+
A `pandas.DataFrame` with 3 rows and the following columns:
151+
`['nation', 'gold', 'silver', 'bronze']`.
152+
If `indexed` is True, the 'nation' column is used as the index and the column index
153+
is named 'medal'"""
150154
df = _get_dataset("medals")
151155
if indexed:
152156
df = df.set_index("nation")
@@ -156,14 +160,13 @@ def medals_wide(indexed=False):
156160

157161
def medals_long(indexed=False):
158162
"""
159-
This dataset represents the medal table for Olympic Short Track Speed Skating for the
160-
top three nations as of 2020.
163+
This dataset represents the medal table for Olympic Short Track Speed Skating for the
164+
top three nations as of 2020.
161165
162-
Returns:
163-
A `pandas.DataFrame` with 9 rows and the following columns:
164-
`['nation', 'medal', 'count']`.
165-
If `indexed` is True, the 'nation' column is used as the index.
166-
"""
166+
Returns:
167+
A `pandas.DataFrame` with 9 rows and the following columns:
168+
`['nation', 'medal', 'count']`.
169+
If `indexed` is True, the 'nation' column is used as the index."""
167170
df = _get_dataset("medals").melt(
168171
id_vars=["nation"], value_name="count", var_name="medal"
169172
)
Binary file not shown.

0 commit comments

Comments
 (0)