-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathEarthdataDownload.py
119 lines (60 loc) · 2.93 KB
/
EarthdataDownload.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#---------------------------------------------------------------------------------
# This script is used to download data from earthdata.nasa.gov
# It was adapted from © Peter Smith and Catalino Cuadrado:
# https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
# change the user name, password and link list file
# only tested for GLDAS NOAH data
# https://disc.gsfc.nasa.gov/datasets?page=1&project=GLDAS
#
# Department of Earth Sciences, Uppsala University
# Shunan Feng: [email protected]
# 20190317
#---------------------------------------------------------------------------------
import requests # get the requsts library from https://github.com/requests/requests
from time import sleep
# overriding requests.Session.rebuild_auth to mantain headers when redirected
class SessionWithHeaderRedirection(requests.Session):
AUTH_HOST = 'urs.earthdata.nasa.gov'
def __init__(self, username, password):
super().__init__()
self.auth = (username, password)
# Overrides from the library to keep headers when redirected to or from
# the NASA auth host.
def rebuild_auth(self, prepared_request, response):
headers = prepared_request.headers
url = prepared_request.url
if 'Authorization' in headers:
original_parsed = requests.utils.urlparse(response.request.url)
redirect_parsed = requests.utils.urlparse(url)
if (original_parsed.hostname != redirect_parsed.hostname) and \
redirect_parsed.hostname != self.AUTH_HOST and \
original_parsed.hostname != self.AUTH_HOST:
del headers['Authorization']
return
# create session with the user credentials that will be used to authenticate access to the data
username = "username" # change here
password= "password" # change here
session = SessionWithHeaderRedirection(username, password)
# the url of the file we wish to retrieve; excute line by line from link list
f = open("LINKLIST.txt", "r") # I usually remove the first link of the pdf file
x = f.readlines()
f.close()
#
for url in x:
url = url.strip()
sleep(1)
# extract the filename from the url to be used when saving the file
filename = url[url.rfind('GLDAS_NOAH025_M.A'):url.rfind('.nc4')-4]
try:
# submit the request using the session
response = session.get(url, stream=True)
print(response.status_code)
# raise an exception in case of http errors
response.raise_for_status()
# save the file
with open(filename, 'wb') as fd:
for chunk in response.iter_content(chunk_size=1024*1024):
fd.write(chunk)
except requests.exceptions.HTTPError as e:
# handle any errors here
print(e)