How To Access Data With Python
The following Python code example demonstrates how to configure a connection to download data from an Earthdata Login enabled server. Note that you will need to a secure way to configure the Earthdata Login username and password.
# Tested on Python 3
#!/usr/bin/python
from http.cookiejar import CookieJar
from urllib import urlencode
import urllib
# The user credentials that will be used to authenticate access to the data
username = "<Your Earthdata login username>"
password = "<Your Earthdata login password>"
# The url of the file we wish to retrieve
url = "https://daacdata.apps.nsidc.org/pub/DATASETS/nsidc0192_seaice_trends_climo_v3/total-ice-area-extent/nasateam/gsfc.nasateam.month.anomaly.area.1978-2021.s"
# Create a password manager to deal with the 401 reponse that is returned from
# Earthdata Login
password_manager = urllib.request.HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, password)
# Create a cookie jar for storing cookies. This is used to store and return
# the session cookie given to use by the data server (otherwise it will just
# keep sending us back to Earthdata Login to authenticate). Ideally, we
# should use a file based cookie jar to preserve cookies between runs. This
# will make it much more efficient.
cookie_jar = CookieJar()
# Install all the handlers.
opener = urllib.request.build_opener(
urllib.request..HTTPBasicAuthHandler(password_manager),
#urllib.request.HTTPHandler(debuglevel=1), # Uncomment these two lines to see
#urllib.request.HTTPSHandler(debuglevel=1), # details of the requests/responses
urllib.request.HTTPCookieProcessor(cookie_jar))
urllib.request.install_opener(opener)
# Create and submit the request. There are a wide range of exceptions that
# can be thrown here, including HTTPError and URLError. These should be
# caught and handled.
request = urllib.request.Request(url)
response = urllib.request.urlopen(request)
# Print out the result (not a good idea with binary data!)
body = response.read()
print(body)
Here is another example that will only work with later versions of python:
#!/usr/bin/python
import requests # get the requsts library from https://github.com/requests/requests
# overriding requests.Session.rebuild_auth to mantain headers when redirected
class SessionWithHeaderRedirection(requests.Session):
AUTH_HOST = 'urs.earthdata.nasa.gov'
def __init__(self, username, password):
super().__init__()
self.auth = (username, password)
# Overrides from the library to keep headers when redirected to or from
# the NASA auth host.
def rebuild_auth(self, prepared_request, response):
headers = prepared_request.headers
url = prepared_request.url
if 'Authorization' in headers:
original_parsed = requests.utils.urlparse(response.request.url)
redirect_parsed = requests.utils.urlparse(url)
if (original_parsed.hostname != redirect_parsed.hostname) and \
redirect_parsed.hostname != self.AUTH_HOST and \
original_parsed.hostname != self.AUTH_HOST:
del headers['Authorization']
return
# create session with the user credentials that will be used to authenticate access to the data
username = "USERNAME"
password= "PASSWORD"
session = SessionWithHeaderRedirection(username, password)
# the url of the file we wish to retrieve
url = "https://n5eil01u.ecs.nsidc.org/MOST/MOD10A1.006/2016.12.31/MOD10A1.A2016366.h14v03.006.2017002110336.hdf.xml"
# extract the filename from the url to be used when saving the file
filename = url[url.rfind('/')+1:]
try:
# submit the request using the session
response = session.get(url, stream=True)
print(response.status_code)
# raise an exception in case of http errors
response.raise_for_status()
# save the file
with open(filename, 'wb') as fd:
for chunk in response.iter_content(chunk_size=1024*1024):
fd.write(chunk)
except requests.exceptions.HTTPError as e:
# handle any errors here
print(e)
Here is another simple example:
# assuming variables `username`, `password` and `url` are set...
# Example URL
url = "https://n5eil01u.ecs.nsidc.org/MOST/MOD10A1.006/2016.12.31/"
# url = "https://e4ftl01.cr.usgs.gov/MOTA/MCD43A2.006/2017.09.04/"
import requests
with requests.Session() as session:
session.auth = (username, password)
r1 = session.request('get', url)
r = session.get(r1.url, auth=(username, password))
if r.ok:
print(r.content)# Say
Accessing Data from NSIDC:
NSIDC has provided sample scripts to access their data with Python: