Coverage for src/mars_mcd_helper/get_mars_data.py : 100.00%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2This module handles getting data from the MCD by scraping the cgi interface.
4We simply pass parameters up in the url, like the web version interface does.
5Then we scrape the resulting web page for the link to the data and (optionally)
6the image[s].
9Note that this is a simple scraper and is not in any sense affiliated with the
10MCD project. Please do not run it against the server too often or
11unreasonably. Where possible use the saved output (this is why we provide a
12saved output).
13"""
14import random
15from collections import namedtuple
16from logging import getLogger
17from pathlib import Path
18import time
19from typing import Union
21import requests
22from bs4 import BeautifulSoup
23from requests.exceptions import ConnectionError
25logger = getLogger(__name__)
28base_params = {
29 "datekeyhtml": 1,
30 "ls": 85.3,
31 "localtime": 0.0, # noqa
32 "year": None,
33 "month": None,
34 "day": None,
35 "hours": None,
36 "minutes": None,
37 "seconds": None,
38 "julian": None,
39 "martianyear": None,
40 "martianmonth": None,
41 "sol": None,
42 "latitude": "all",
43 "longitude": "all",
44 "altitude": 10.0,
45 "zkey": 3,
46 "isfixedlt": "off",
47 "dust": 1,
48 "hrkey": 1,
49 "zonmean": "off",
50 "var1": "mtot",
51 "var2": "t",
52 "var3": "p",
53 "var4": "none",
54 "dpi": 80,
55 "islog": "off",
56 "colorm": "Blues",
57 "minval": None,
58 "maxval": None,
59 "proj": "cyl",
60 "plat": None,
61 "plon": None,
62 "trans": None,
63 "iswind": "off",
64 "latpoint": None,
65 "lonpoint": None,
66}
67"""Parameters which can be passed to the server. Defaults set here are
68extracted from the web interface. Any parameter set to `None` will not be
69passed. To pass `"none"` use a string. Do not override this dict directly;
70rather pass the parameter and value as keyword arguments to `fetch_data()`."""
73urlbase = "http://www-mars.lmd.jussieu.fr/mcd_python/"
74url = urlbase + "cgi-bin/mcdcgi.py"
75_FetchedFiles = namedtuple("_FetchedFiles", ["dataf", "imgf"])
78def generate_fn(**params) -> str:
79 """
80 Generate a unique filename from given params.
82 This function is used
83 internally with the parameters used by `fetch_data()`. It is provided here
84 in case you need to generate the filename from a given set of params.
86 Args:
87 **params: params to consider.
89 Returns:
90 (str): Fn from params.
91 """
92 fn = "-".join(f"{k}_{x}" for k, x in params.items() if x is not None)
93 return f"marsdata_{fn}.txt"
96class FetchingError(Exception):
97 """
98 Error fetching resource.
100 The server returns `200` with an html error
101 message, so we raise an exception and pass the error message up.
102 """
105def get(*args, max_wait: int = 30, **kwargs) -> requests.Response:
106 """
107 Get with exponential backoff.
109 Args:
110 *args: Args for requests.get
111 max_wait (int): Max seconds to wait. (Default value = 30)
112 **kwargs: Kwargs for requests.get
114 Returns:
115 (requests.Response): response
117 Raises:
118 ConnectionError: if unable to connect.
121 """
122 start = time.monotonic()
123 max_wait *= 1000
124 i = 0
125 while time.monotonic() - start < max_wait:
126 try:
127 return requests.get(*args, **kwargs)
128 except ConnectionError:
129 wait = (2 ** i) + (random.randint(0, 1000) / 1000)
130 logger.warning(f"Failed to connect, pausing {wait} s and retrying")
131 time.sleep(wait)
132 logger.error("Failed to fetch.")
133 raise ConnectionError("Max retries exceeded.")
136def fetch_data(outdir: Union[Path, str] = ".", get_data: bool = True, get_img: bool = False, **params):
137 """
138 Fetch data from the MCD and save in outdir.
140 Keyword arguments (other
141 than `outdir`) will override the defaults in `base_params`.
143 Args:
144 outdir (Union[Path, str]): dir to save in (Default value = ".")
145 get_data (bool): get data or not (Default value = True)
146 get_img (bool): get img or not (Default value = False)
147 **params: Parameters to override.
149 Raises:
150 FetchingError: Failed to fetch requested data.
152 Returns:
153 (Path): output file.
155 Call this function to retrieve data from the server and save it in a file.
156 Keyword arguments passed here will override the defaults in `base_params`,
157 e.g.:
159 ```python
160 >> fetch_data(ls=0.5, localtime=1).dataf
161 Path("marsdata_ls_0.5-localtime_1.txt")
162 ```
163 For more information on any particular parameter see the web interface.
164 """
165 p = base_params.copy()
166 p.update(params)
167 logger.info("Fetching page")
168 r = get(url, params=p)
169 if "Ooops!" in r.text:
170 raise FetchingError(f"Failed to download, server said {r.text}")
171 print(r, r.text)
172 soup = BeautifulSoup(r.text, features="html.parser")
173 if isinstance(outdir, str):
174 outdir = Path(outdir).expanduser().resolve()
176 dataf, imgf = None, None
178 if get_data:
179 data_url = urlbase + soup.body.a["href"].replace("../", "")
180 logger.info(f"Fetching ascii data from {data_url}")
181 r = get(data_url)
182 dataf = outdir / generate_fn(**params)
183 with dataf.open("w") as f:
184 f.write(r.text)
186 if get_img:
187 img_url = urlbase + soup.body.img["src"].replace("../", "")
188 logger.info(f"Fetching img from {img_url}")
189 r = get(img_url)
190 imgf = (outdir / generate_fn(**params)).with_suffix(".png")
191 with imgf.open("wb") as im:
192 im.write(r.content)
194 return _FetchedFiles(dataf, imgf)