Skip to content

Commit 522c61c

Browse files
committed
CLN: Cleanup WorldBank reader
1 parent ccc5aa9 commit 522c61c

File tree

5 files changed

+438
-182
lines changed

5 files changed

+438
-182
lines changed

pandas_datareader/_utils.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import warnings
2-
1+
import pandas as pd
32
from pandas.core.common import PandasError
43

54

@@ -10,3 +9,21 @@ class RemoteDataError(PandasError, IOError):
109
pass
1110

1211

12+
from distutils.version import LooseVersion
13+
14+
PANDAS_VERSION = LooseVersion(pd.__version__)
15+
16+
if PANDAS_VERSION >= LooseVersion('0.17.0'):
17+
PANDAS_0170 = True
18+
else:
19+
PANDAS_0170 = False
20+
21+
if PANDAS_VERSION >= LooseVersion('0.16.0'):
22+
PANDAS_0160 = True
23+
else:
24+
PANDAS_0160 = False
25+
26+
if PANDAS_VERSION >= LooseVersion('0.14.0'):
27+
PANDAS_0140 = True
28+
else:
29+
PANDAS_0140 = False

pandas_datareader/base.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from pandas import to_datetime
99
import pandas.compat as compat
10-
from pandas.core.common import PandasError
10+
from pandas.core.common import PandasError, is_number
1111
from pandas import Panel, DataFrame
1212
from pandas import read_csv
1313
from pandas.compat import StringIO, bytes_to_str
@@ -37,6 +37,7 @@ class _BaseReader(object):
3737
"""
3838

3939
_chunk_size = 1024 * 1024
40+
_format = 'string'
4041

4142
def __init__(self, symbols, start=None, end=None,
4243
retry_count=3, pause=0.001, session=None):
@@ -73,7 +74,12 @@ def read(self):
7374

7475
def _read_one_data(self, url, params):
7576
""" read one data from specified URL """
76-
out = self._read_url_as_StringIO(self.url, params=params)
77+
if self._format == 'string':
78+
out = self._read_url_as_StringIO(url, params=params)
79+
elif self._format == 'json':
80+
out = self._get_response(url, params=params).json()
81+
else:
82+
raise NotImplementedError(self._format)
7783
return self._read_lines(out)
7884

7985
def _read_url_as_StringIO(self, url, params=None):
@@ -128,8 +134,15 @@ def _sanitize_dates(self, start, end):
128134
if start is None - default is 2010/01/01
129135
if end is None - default is today
130136
"""
137+
if is_number(start):
138+
# regard int as year
139+
start = dt.datetime(start, 1, 1)
131140
start = to_datetime(start)
141+
142+
if is_number(end):
143+
end = dt.datetime(end, 1, 1)
132144
end = to_datetime(end)
145+
133146
if start is None:
134147
start = dt.datetime(2010, 1, 1)
135148
if end is None:

pandas_datareader/oecd.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ class OECDReader(_BaseReader):
99

1010
"""Get data for the given name from OECD."""
1111

12+
_format = 'json'
13+
1214
@property
1315
def url(self):
1416
url = 'http://stats.oecd.org/SDMX-JSON/data'
@@ -19,10 +21,9 @@ def url(self):
1921
# API: https://data.oecd.org/api/sdmx-json-documentation/
2022
return '{0}/{1}/all/all?'.format(url, self.symbols)
2123

22-
def _read_one_data(self, url, params):
24+
def _read_lines(self, out):
2325
""" read one data from specified URL """
24-
resp = self._get_response(url)
25-
df = read_jsdmx(resp.json())
26+
df = read_jsdmx(out)
2627
try:
2728
idx_name = df.index.name # hack for pandas 0.16.2
2829
df.index = pd.to_datetime(df.index)

pandas_datareader/tests/test_wb.py

+140-27
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,13 @@
11
import nose
2+
import time
23

3-
import pandas
4-
from pandas.util.testing import assert_frame_equal
4+
import numpy as np
5+
import pandas as pd
56
import pandas.util.testing as tm
67

7-
from pandas_datareader.wb import search, download, get_countries
8-
9-
try:
10-
from pandas.compat import u
11-
except ImportError: # pragma: no cover
12-
try:
13-
unicode # python 2
14-
def u(s):
15-
return unicode(s, "unicode_escape")
16-
except NameError:
17-
def u(s):
18-
return s
8+
from pandas_datareader.wb import (search, download, get_countries,
9+
get_indicators, WorldBankReader)
10+
from pandas_datareader._utils import PANDAS_0170, PANDAS_0160, PANDAS_0140
1911

2012

2113
class TestWB(tm.TestCase):
@@ -29,6 +21,12 @@ def test_wdi_search(self):
2921
result = search('gdp.*capita.*constant')
3022
self.assertTrue(result.name.str.contains('GDP').any())
3123

24+
# check cache returns the results within 0.5 sec
25+
current_time = time.time()
26+
result = search('gdp.*capita.*constant')
27+
self.assertTrue(result.name.str.contains('GDP').any())
28+
self.assertTrue(time.time() - current_time < 0.5)
29+
3230
def test_wdi_download(self):
3331

3432
# Test a bad indicator with double (US), triple (USA),
@@ -43,18 +41,120 @@ def test_wdi_download(self):
4341
cntry_codes = ['CA', 'MX', 'USA', 'US', 'US', 'KSV', 'BLA']
4442
inds = ['NY.GDP.PCAP.CD','BAD.INDICATOR']
4543

46-
expected = {'NY.GDP.PCAP.CD': {('Canada', '2003'): 28026.006013044702, ('Mexico', '2003'): 6601.0420648056606, ('Canada', '2004'): 31829.522562759001, ('Kosovo', '2003'): 1969.56271307405, ('Mexico', '2004'): 7042.0247834044303, ('United States', '2004'): 41928.886136479705, ('United States', '2003'): 39682.472247320402, ('Kosovo', '2004'): 2135.3328465238301}}
47-
expected = pandas.DataFrame(expected)
44+
expected = {'NY.GDP.PCAP.CD': {('Canada', '2004'): 31829.522562759001, ('Canada', '2003'): 28026.006013044702,
45+
('Kosovo', '2004'): 2135.3328465238301, ('Kosovo', '2003'): 1969.56271307405,
46+
('Mexico', '2004'): 7042.0247834044303, ('Mexico', '2003'): 6601.0420648056606,
47+
('United States', '2004'): 41928.886136479705, ('United States', '2003'): 39682.472247320402}}
48+
expected = pd.DataFrame(expected)
4849
# Round, to ignore revisions to data.
49-
expected = pandas.np.round(expected,decimals=-3)
50-
expected.sort(inplace=True)
50+
expected = np.round(expected,decimals=-3)
51+
if PANDAS_0170:
52+
expected = expected.sort_index()
53+
else:
54+
expected = expected.sort()
55+
5156
result = download(country=cntry_codes, indicator=inds,
5257
start=2003, end=2004, errors='ignore')
53-
result.sort(inplace=True)
58+
if PANDAS_0170:
59+
result = result.sort_index()
60+
else:
61+
result = result.sort()
5462
# Round, to ignore revisions to data.
55-
result = pandas.np.round(result,decimals=-3)
56-
expected.index = result.index
57-
assert_frame_equal(result, pandas.DataFrame(expected))
63+
result = np.round(result, decimals=-3)
64+
65+
66+
if PANDAS_0140:
67+
expected.index.names=['country', 'year']
68+
else:
69+
# prior versions doesn't allow to set multiple names to MultiIndex
70+
# Thus overwrite it with the result
71+
expected.index = result.index
72+
tm.assert_frame_equal(result, expected)
73+
74+
# pass start and end as string
75+
result = download(country=cntry_codes, indicator=inds,
76+
start='2003', end='2004', errors='ignore')
77+
if PANDAS_0170:
78+
result = result.sort_index()
79+
else:
80+
result = result.sort()
81+
# Round, to ignore revisions to data.
82+
result = np.round(result, decimals=-3)
83+
tm.assert_frame_equal(result, expected)
84+
85+
def test_wdi_download_str(self):
86+
87+
expected = {'NY.GDP.PCAP.CD': {('Japan', '2004'): 36441.50449394,
88+
('Japan', '2003'): 33690.93772972,
89+
('Japan', '2002'): 31235.58818439,
90+
('Japan', '2001'): 32716.41867489,
91+
('Japan', '2000'): 37299.64412913}}
92+
expected = pd.DataFrame(expected)
93+
# Round, to ignore revisions to data.
94+
expected = np.round(expected, decimals=-3)
95+
if PANDAS_0170:
96+
expected = expected.sort_index()
97+
else:
98+
expected = expected.sort()
99+
100+
cntry_codes = 'JP'
101+
inds = 'NY.GDP.PCAP.CD'
102+
result = download(country=cntry_codes, indicator=inds,
103+
start=2000, end=2004, errors='ignore')
104+
if PANDAS_0170:
105+
result = result.sort_index()
106+
else:
107+
result = result.sort()
108+
result = np.round(result, decimals=-3)
109+
110+
if PANDAS_0140:
111+
expected.index.names=['country', 'year']
112+
else:
113+
# prior versions doesn't allow to set multiple names to MultiIndex
114+
# Thus overwrite it with the result
115+
expected.index = result.index
116+
117+
tm.assert_frame_equal(result, expected)
118+
119+
result = WorldBankReader(inds, countries=cntry_codes,
120+
start=2000, end=2004, errors='ignore').read()
121+
if PANDAS_0170:
122+
result = result.sort_index()
123+
else:
124+
result = result.sort()
125+
result = np.round(result, decimals=-3)
126+
tm.assert_frame_equal(result, expected)
127+
128+
def test_wdi_download_error_handling(self):
129+
cntry_codes = ['USA', 'XX']
130+
inds = 'NY.GDP.PCAP.CD'
131+
132+
with tm.assertRaisesRegexp(ValueError, "Invalid Country Code\\(s\\): XX"):
133+
result = download(country=cntry_codes, indicator=inds,
134+
start=2003, end=2004, errors='raise')
135+
136+
if PANDAS_0160:
137+
# assert_produces_warning doesn't exists in prior versions
138+
with self.assert_produces_warning():
139+
result = download(country=cntry_codes, indicator=inds,
140+
start=2003, end=2004, errors='warn')
141+
self.assertTrue(isinstance(result, pd.DataFrame))
142+
self.assertEqual(len(result), 2)
143+
144+
cntry_codes = ['USA']
145+
inds = ['NY.GDP.PCAP.CD', 'BAD_INDICATOR']
146+
147+
with tm.assertRaisesRegexp(ValueError, "The provided parameter value is not valid\\. Indicator: BAD_INDICATOR"):
148+
result = download(country=cntry_codes, indicator=inds,
149+
start=2003, end=2004, errors='raise')
150+
151+
if PANDAS_0160:
152+
with self.assert_produces_warning():
153+
result = download(country=cntry_codes, indicator=inds,
154+
start=2003, end=2004, errors='warn')
155+
self.assertTrue(isinstance(result, pd.DataFrame))
156+
self.assertEqual(len(result), 2)
157+
58158

59159
def test_wdi_download_w_retired_indicator(self):
60160

@@ -101,11 +201,24 @@ def test_wdi_download_w_crash_inducing_countrycode(self):
101201
raise nose.SkipTest("Invalid results")
102202

103203
def test_wdi_get_countries(self):
104-
result = get_countries()
105-
self.assertTrue('Zimbabwe' in list(result['name']))
106-
self.assertTrue(len(result) > 100)
107-
self.assertTrue(pandas.notnull(result.latitude.mean()))
108-
self.assertTrue(pandas.notnull(result.longitude.mean()))
204+
result1 = get_countries()
205+
result2 = WorldBankReader().get_countries()
206+
207+
for result in [result1, result2]:
208+
self.assertTrue('Zimbabwe' in list(result['name']))
209+
self.assertTrue(len(result) > 100)
210+
self.assertTrue(pd.notnull(result.latitude.mean()))
211+
self.assertTrue(pd.notnull(result.longitude.mean()))
212+
213+
def test_wdi_get_indicators(self):
214+
result1 = get_indicators()
215+
result2 = WorldBankReader().get_indicators()
216+
217+
for result in [result1, result2]:
218+
exp_col = pd.Index(['id', 'name', 'source', 'sourceNote', 'sourceOrganization', 'topics'])
219+
# assert_index_equal doesn't exists
220+
self.assertTrue(result.columns.equals(exp_col))
221+
self.assertTrue(len(result) > 10000)
109222

110223

111224
if __name__ == '__main__':

0 commit comments

Comments
 (0)