1
+ import time
2
+ import warnings
3
+ import numpy as np
4
+ import datetime as dt
5
+
6
+ import requests
7
+
8
+ from pandas import to_datetime
9
+ import pandas .compat as compat
10
+ from pandas .core .common import PandasError
11
+ from pandas import Panel , DataFrame
12
+ from pandas import read_csv
13
+ from pandas .compat import StringIO , bytes_to_str
14
+ from pandas .util .testing import _network_error_classes
15
+
16
+ from pandas_datareader ._utils import RemoteDataError , SymbolWarning
17
+
18
+
19
+ class _BaseReader (object ):
20
+
21
+ """
22
+
23
+ Parameters
24
+ ----------
25
+ sym : string with a single Single stock symbol (ticker).
26
+ start : string, (defaults to '1/1/2010')
27
+ Starting date, timestamp. Parses many different kind of date
28
+ representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
29
+ end : string, (defaults to today)
30
+ Ending date, timestamp. Same format as starting date.
31
+ retry_count : int, default 3
32
+ Number of times to retry query request.
33
+ pause : int, default 0
34
+ Time, in seconds, of the pause between retries.
35
+ session : Session, default None
36
+ requests.sessions.Session instance to be used
37
+ """
38
+
39
+ _chunk_size = 1024 * 1024
40
+
41
+ def __init__ (self , symbols , start = None , end = None ,
42
+ retry_count = 3 , pause = 0.001 , session = None ):
43
+ self .symbols = symbols
44
+
45
+ start , end = self ._sanitize_dates (start , end )
46
+ self .start = start
47
+ self .end = end
48
+
49
+ if not isinstance (retry_count , int ) or retry_count < 0 :
50
+ raise ValueError ("'retry_count' must be integer larger than 0" )
51
+ self .retry_count = retry_count
52
+ self .pause = pause
53
+ self .session = self ._init_session (session , retry_count )
54
+
55
+ def _init_session (self , session , retry_count ):
56
+ if session is None :
57
+ session = requests .Session ()
58
+ # do not set requests max_retries here to support arbitrary pause
59
+ return session
60
+
61
+ @property
62
+ def url (self ):
63
+ # must be overridden in subclass
64
+ raise NotImplementedError
65
+
66
+ @property
67
+ def params (self ):
68
+ return None
69
+
70
+ def read (self ):
71
+ """ read data """
72
+ return self ._read_one_data (self .url , self .params )
73
+
74
+ def _read_one_data (self , url , params ):
75
+ """ read one data from specified URL """
76
+ out = self ._read_url_as_StringIO (self .url , params = params )
77
+ return self ._read_lines (out )
78
+
79
+ def _read_url_as_StringIO (self , url , params = None ):
80
+ """
81
+ Open url (and retry)
82
+ """
83
+ response = self ._get_response (url , params = params )
84
+ out = StringIO ()
85
+ if isinstance (response .content , compat .binary_type ):
86
+ out .write (bytes_to_str (response .content ))
87
+ else :
88
+ out .write (response .content )
89
+ out .seek (0 )
90
+ return out
91
+
92
+ def _get_response (self , url , params = None ):
93
+ """ send raw HTTP request to get requests.Response from the specified url
94
+ Parameters
95
+ ----------
96
+ url : str
97
+ target URL
98
+ params : dict or None
99
+ parameters passed to the URL
100
+ """
101
+
102
+ # initial attempt + retry
103
+ for i in range (self .retry_count + 1 ):
104
+ response = self .session .get (url , params = params )
105
+ if response .status_code == requests .codes .ok :
106
+ return response
107
+ time .sleep (self .pause )
108
+
109
+ raise RemoteDataError ('Unable to read URL: {0}' .format (url ))
110
+
111
+ def _read_lines (self , out ):
112
+ rs = read_csv (out , index_col = 0 , parse_dates = True , na_values = '-' )[::- 1 ]
113
+ # Yahoo! Finance sometimes does this awesome thing where they
114
+ # return 2 rows for the most recent business day
115
+ if len (rs ) > 2 and rs .index [- 1 ] == rs .index [- 2 ]: # pragma: no cover
116
+ rs = rs [:- 1 ]
117
+ #Get rid of unicode characters in index name.
118
+ try :
119
+ rs .index .name = rs .index .name .decode ('unicode_escape' ).encode ('ascii' , 'ignore' )
120
+ except AttributeError :
121
+ #Python 3 string has no decode method.
122
+ rs .index .name = rs .index .name .encode ('ascii' , 'ignore' ).decode ()
123
+ return rs
124
+
125
+ def _sanitize_dates (self , start , end ):
126
+ """
127
+ Return (datetime_start, datetime_end) tuple
128
+ if start is None - default is 2010/01/01
129
+ if end is None - default is today
130
+ """
131
+ start = to_datetime (start )
132
+ end = to_datetime (end )
133
+ if start is None :
134
+ start = dt .datetime (2010 , 1 , 1 )
135
+ if end is None :
136
+ end = dt .datetime .today ()
137
+ return start , end
138
+
139
+
140
+ class _DailyBaseReader (_BaseReader ):
141
+ """ Base class for Google / Yahoo daily reader """
142
+
143
+ def __init__ (self , symbols = None , start = None , end = None , retry_count = 3 ,
144
+ pause = 0.001 , session = None , chunksize = 25 ):
145
+ super (_DailyBaseReader , self ).__init__ (symbols = symbols ,
146
+ start = start , end = end ,
147
+ retry_count = retry_count ,
148
+ pause = pause , session = session )
149
+ self .chunksize = chunksize
150
+
151
+ def _get_params (self , * args , ** kwargs ):
152
+ raise NotImplementedError
153
+
154
+ def read (self ):
155
+ """ read data """
156
+ # If a single symbol, (e.g., 'GOOG')
157
+ if isinstance (self .symbols , (compat .string_types , int )):
158
+ df = self ._read_one_data (self .url , params = self ._get_params (self .symbols ))
159
+ # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
160
+ elif isinstance (self .symbols , DataFrame ):
161
+ df = self ._dl_mult_symbols (self .symbols .index )
162
+ else :
163
+ df = self ._dl_mult_symbols (self .symbols )
164
+ return df
165
+
166
+ def _dl_mult_symbols (self , symbols ):
167
+ stocks = {}
168
+ failed = []
169
+ passed = []
170
+ for sym_group in _in_chunks (symbols , self .chunksize ):
171
+ for sym in sym_group :
172
+ try :
173
+ stocks [sym ] = self ._read_one_data (self .url , self ._get_params (sym ))
174
+ passed .append (sym )
175
+ except IOError :
176
+ msg = 'Failed to read symbol: {0!r}, replacing with NaN.'
177
+ warnings .warn (msg .format (sym ), SymbolWarning )
178
+ failed .append (sym )
179
+
180
+ if len (passed ) == 0 :
181
+ msg = "No data fetched using {0!r}"
182
+ raise RemoteDataError (msg .format (self .__class__ .__name__ ))
183
+ try :
184
+ if len (stocks ) > 0 and len (failed ) > 0 and len (passed ) > 0 :
185
+ df_na = stocks [passed [0 ]].copy ()
186
+ df_na [:] = np .nan
187
+ for sym in failed :
188
+ stocks [sym ] = df_na
189
+ return Panel (stocks ).swapaxes ('items' , 'minor' )
190
+ except AttributeError :
191
+ # cannot construct a panel with just 1D nans indicating no data
192
+ msg = "No data fetched using {0!r}"
193
+ raise RemoteDataError (msg .format (self .__class__ .__name__ ))
194
+
195
+
196
+ def _in_chunks (seq , size ):
197
+ """
198
+ Return sequence in 'chunks' of size defined by size
199
+ """
200
+ return (seq [pos :pos + size ] for pos in range (0 , len (seq ), size ))
0 commit comments