1
1
import nose
2
+ import time
2
3
3
- import pandas
4
- from pandas . util . testing import assert_frame_equal
4
+ import numpy as np
5
+ import pandas as pd
5
6
import pandas .util .testing as tm
6
7
7
- from pandas_datareader .wb import search , download , get_countries
8
-
9
- try :
10
- from pandas .compat import u
11
- except ImportError : # pragma: no cover
12
- try :
13
- unicode # python 2
14
- def u (s ):
15
- return unicode (s , "unicode_escape" )
16
- except NameError :
17
- def u (s ):
18
- return s
8
+ from pandas_datareader .wb import (search , download , get_countries ,
9
+ get_indicators , WorldBankReader )
10
+ from pandas_datareader ._utils import PANDAS_0170 , PANDAS_0160 , PANDAS_0140
19
11
20
12
21
13
class TestWB (tm .TestCase ):
@@ -29,6 +21,12 @@ def test_wdi_search(self):
29
21
result = search ('gdp.*capita.*constant' )
30
22
self .assertTrue (result .name .str .contains ('GDP' ).any ())
31
23
24
+ # check cache returns the results within 0.5 sec
25
+ current_time = time .time ()
26
+ result = search ('gdp.*capita.*constant' )
27
+ self .assertTrue (result .name .str .contains ('GDP' ).any ())
28
+ self .assertTrue (time .time () - current_time < 0.5 )
29
+
32
30
def test_wdi_download (self ):
33
31
34
32
# Test a bad indicator with double (US), triple (USA),
@@ -43,18 +41,120 @@ def test_wdi_download(self):
43
41
cntry_codes = ['CA' , 'MX' , 'USA' , 'US' , 'US' , 'KSV' , 'BLA' ]
44
42
inds = ['NY.GDP.PCAP.CD' ,'BAD.INDICATOR' ]
45
43
46
- expected = {'NY.GDP.PCAP.CD' : {('Canada' , '2003' ): 28026.006013044702 , ('Mexico' , '2003' ): 6601.0420648056606 , ('Canada' , '2004' ): 31829.522562759001 , ('Kosovo' , '2003' ): 1969.56271307405 , ('Mexico' , '2004' ): 7042.0247834044303 , ('United States' , '2004' ): 41928.886136479705 , ('United States' , '2003' ): 39682.472247320402 , ('Kosovo' , '2004' ): 2135.3328465238301 }}
47
- expected = pandas .DataFrame (expected )
44
+ expected = {'NY.GDP.PCAP.CD' : {('Canada' , '2004' ): 31829.522562759001 , ('Canada' , '2003' ): 28026.006013044702 ,
45
+ ('Kosovo' , '2004' ): 2135.3328465238301 , ('Kosovo' , '2003' ): 1969.56271307405 ,
46
+ ('Mexico' , '2004' ): 7042.0247834044303 , ('Mexico' , '2003' ): 6601.0420648056606 ,
47
+ ('United States' , '2004' ): 41928.886136479705 , ('United States' , '2003' ): 39682.472247320402 }}
48
+ expected = pd .DataFrame (expected )
48
49
# Round, to ignore revisions to data.
49
- expected = pandas .np .round (expected ,decimals = - 3 )
50
- expected .sort (inplace = True )
50
+ expected = np .round (expected ,decimals = - 3 )
51
+ if PANDAS_0170 :
52
+ expected = expected .sort_index ()
53
+ else :
54
+ expected = expected .sort ()
55
+
51
56
result = download (country = cntry_codes , indicator = inds ,
52
57
start = 2003 , end = 2004 , errors = 'ignore' )
53
- result .sort (inplace = True )
58
+ if PANDAS_0170 :
59
+ result = result .sort_index ()
60
+ else :
61
+ result = result .sort ()
54
62
# Round, to ignore revisions to data.
55
- result = pandas .np .round (result ,decimals = - 3 )
56
- expected .index = result .index
57
- assert_frame_equal (result , pandas .DataFrame (expected ))
63
+ result = np .round (result , decimals = - 3 )
64
+
65
+
66
+ if PANDAS_0140 :
67
+ expected .index .names = ['country' , 'year' ]
68
+ else :
69
+ # prior versions doesn't allow to set multiple names to MultiIndex
70
+ # Thus overwrite it with the result
71
+ expected .index = result .index
72
+ tm .assert_frame_equal (result , expected )
73
+
74
+ # pass start and end as string
75
+ result = download (country = cntry_codes , indicator = inds ,
76
+ start = '2003' , end = '2004' , errors = 'ignore' )
77
+ if PANDAS_0170 :
78
+ result = result .sort_index ()
79
+ else :
80
+ result = result .sort ()
81
+ # Round, to ignore revisions to data.
82
+ result = np .round (result , decimals = - 3 )
83
+ tm .assert_frame_equal (result , expected )
84
+
85
+ def test_wdi_download_str (self ):
86
+
87
+ expected = {'NY.GDP.PCAP.CD' : {('Japan' , '2004' ): 36441.50449394 ,
88
+ ('Japan' , '2003' ): 33690.93772972 ,
89
+ ('Japan' , '2002' ): 31235.58818439 ,
90
+ ('Japan' , '2001' ): 32716.41867489 ,
91
+ ('Japan' , '2000' ): 37299.64412913 }}
92
+ expected = pd .DataFrame (expected )
93
+ # Round, to ignore revisions to data.
94
+ expected = np .round (expected , decimals = - 3 )
95
+ if PANDAS_0170 :
96
+ expected = expected .sort_index ()
97
+ else :
98
+ expected = expected .sort ()
99
+
100
+ cntry_codes = 'JP'
101
+ inds = 'NY.GDP.PCAP.CD'
102
+ result = download (country = cntry_codes , indicator = inds ,
103
+ start = 2000 , end = 2004 , errors = 'ignore' )
104
+ if PANDAS_0170 :
105
+ result = result .sort_index ()
106
+ else :
107
+ result = result .sort ()
108
+ result = np .round (result , decimals = - 3 )
109
+
110
+ if PANDAS_0140 :
111
+ expected .index .names = ['country' , 'year' ]
112
+ else :
113
+ # prior versions doesn't allow to set multiple names to MultiIndex
114
+ # Thus overwrite it with the result
115
+ expected .index = result .index
116
+
117
+ tm .assert_frame_equal (result , expected )
118
+
119
+ result = WorldBankReader (inds , countries = cntry_codes ,
120
+ start = 2000 , end = 2004 , errors = 'ignore' ).read ()
121
+ if PANDAS_0170 :
122
+ result = result .sort_index ()
123
+ else :
124
+ result = result .sort ()
125
+ result = np .round (result , decimals = - 3 )
126
+ tm .assert_frame_equal (result , expected )
127
+
128
+ def test_wdi_download_error_handling (self ):
129
+ cntry_codes = ['USA' , 'XX' ]
130
+ inds = 'NY.GDP.PCAP.CD'
131
+
132
+ with tm .assertRaisesRegexp (ValueError , "Invalid Country Code\\ (s\\ ): XX" ):
133
+ result = download (country = cntry_codes , indicator = inds ,
134
+ start = 2003 , end = 2004 , errors = 'raise' )
135
+
136
+ if PANDAS_0160 :
137
+ # assert_produces_warning doesn't exists in prior versions
138
+ with self .assert_produces_warning ():
139
+ result = download (country = cntry_codes , indicator = inds ,
140
+ start = 2003 , end = 2004 , errors = 'warn' )
141
+ self .assertTrue (isinstance (result , pd .DataFrame ))
142
+ self .assertEqual (len (result ), 2 )
143
+
144
+ cntry_codes = ['USA' ]
145
+ inds = ['NY.GDP.PCAP.CD' , 'BAD_INDICATOR' ]
146
+
147
+ with tm .assertRaisesRegexp (ValueError , "The provided parameter value is not valid\\ . Indicator: BAD_INDICATOR" ):
148
+ result = download (country = cntry_codes , indicator = inds ,
149
+ start = 2003 , end = 2004 , errors = 'raise' )
150
+
151
+ if PANDAS_0160 :
152
+ with self .assert_produces_warning ():
153
+ result = download (country = cntry_codes , indicator = inds ,
154
+ start = 2003 , end = 2004 , errors = 'warn' )
155
+ self .assertTrue (isinstance (result , pd .DataFrame ))
156
+ self .assertEqual (len (result ), 2 )
157
+
58
158
59
159
def test_wdi_download_w_retired_indicator (self ):
60
160
@@ -101,11 +201,24 @@ def test_wdi_download_w_crash_inducing_countrycode(self):
101
201
raise nose .SkipTest ("Invalid results" )
102
202
103
203
def test_wdi_get_countries (self ):
104
- result = get_countries ()
105
- self .assertTrue ('Zimbabwe' in list (result ['name' ]))
106
- self .assertTrue (len (result ) > 100 )
107
- self .assertTrue (pandas .notnull (result .latitude .mean ()))
108
- self .assertTrue (pandas .notnull (result .longitude .mean ()))
204
+ result1 = get_countries ()
205
+ result2 = WorldBankReader ().get_countries ()
206
+
207
+ for result in [result1 , result2 ]:
208
+ self .assertTrue ('Zimbabwe' in list (result ['name' ]))
209
+ self .assertTrue (len (result ) > 100 )
210
+ self .assertTrue (pd .notnull (result .latitude .mean ()))
211
+ self .assertTrue (pd .notnull (result .longitude .mean ()))
212
+
213
+ def test_wdi_get_indicators (self ):
214
+ result1 = get_indicators ()
215
+ result2 = WorldBankReader ().get_indicators ()
216
+
217
+ for result in [result1 , result2 ]:
218
+ exp_col = pd .Index (['id' , 'name' , 'source' , 'sourceNote' , 'sourceOrganization' , 'topics' ])
219
+ # assert_index_equal doesn't exists
220
+ self .assertTrue (result .columns .equals (exp_col ))
221
+ self .assertTrue (len (result ) > 10000 )
109
222
110
223
111
224
if __name__ == '__main__' :
0 commit comments