Commit 359f7162 authored by Derek Homeier's avatar Derek Homeier
Browse files

Basic `fetch` functionality and download tests

parent 4d3675cb
Pipeline #2710 passed with stage
in 11 minutes and 45 seconds
...@@ -7,7 +7,8 @@ import astropy.units as u ...@@ -7,7 +7,8 @@ import astropy.units as u
from astropy.time import Time from astropy.time import Time
import sunpy.net.attrs as a import sunpy.net.attrs as a
from sunpy.net.attr import AttrAnd, AttrOr, AttrMeta, AttrWalker, DataAttr, and_, or_ from sunpy.net.attr import AttrAnd, AttrOr, AttrMeta, AttrWalker, DataAttr, and_, or_
from sunpy.net.base_client import BaseClient, QueryResponseTable from sunpy.net.base_client import (BaseClient, QueryResponseTable, QueryResponseRow,
convert_row_to_table)
from sunpy.util.exceptions import SunpyUserWarning from sunpy.util.exceptions import SunpyUserWarning
# from sunpy.net.attrs import Instrument, Level, Physobs, Provider, Time, Wavelength # from sunpy.net.attrs import Instrument, Level, Physobs, Provider, Time, Wavelength
import sdc.attrs as sattrs import sdc.attrs as sattrs
...@@ -77,11 +78,12 @@ def _str_val(value, regex=False): ...@@ -77,11 +78,12 @@ def _str_val(value, regex=False):
if isinstance(value, (int, float, complex)): if isinstance(value, (int, float, complex)):
return f"{value:g}" return f"{value:g}"
elif isinstance(value, Time): elif isinstance(value, Time):
# Date query format? - f"{{'$date':{int(value.unix * 1000)}}}" # Dates are stored as f"{{'$date':{int(value.unix * 1000)}}}" but queried as
return f"{{'$date':'{value.isot}'}}" return f"{{'$date':'{value.isot}'}}"
elif regex: elif regex:
return f"{{'$regex':'{str.lower(value)}'}}" return f"{{'$regex':'{str(value)}'}}"
else: else:
# Run through urllib.parse.quote() to be safe?
return f"'{str(value)}'" return f"'{str(value)}'"
...@@ -210,6 +212,16 @@ class KISClient(BaseClient): ...@@ -210,6 +212,16 @@ class KISClient(BaseClient):
def _status(cls): def _status(cls):
return cls.status return cls.status
@staticmethod
def _make_filename(row: QueryResponseRow, path: os.PathLike = None):
"""
Generate a filename for a file based on the observation record; to be worked out.
"""
# The fallback name is just the dataset _id.
name = f"{row['_id']}.fits"
return path.append(name)
def _make_search(self, query_dicts): def _make_search(self, query_dicts):
""" """
Combine (AND) query parameters from list of params dictionaries to search paths. Combine (AND) query parameters from list of params dictionaries to search paths.
...@@ -269,6 +281,7 @@ class KISClient(BaseClient): ...@@ -269,6 +281,7 @@ class KISClient(BaseClient):
return QueryResponseTable(results, client=self) return QueryResponseTable(results, client=self)
@convert_row_to_table
def fetch(self, query_results: QueryResponseTable, *, downloader: parfive.Downloader, def fetch(self, query_results: QueryResponseTable, *, downloader: parfive.Downloader,
path: os.PathLike = None, binary: bool = False, **kwargs): path: os.PathLike = None, binary: bool = False, **kwargs):
""" """
...@@ -285,6 +298,24 @@ class KISClient(BaseClient): ...@@ -285,6 +298,24 @@ class KISClient(BaseClient):
binary : bool, optional binary : bool, optional
Fetch the binary data for the dataset (default: metadata). Fetch the binary data for the dataset (default: metadata).
""" """
if not len(query_results):
return
if binary:
binfile = '/binary'
ext = 'fits'
else:
binfile = ''
ext = 'json'
for row in query_results:
inst = row['description']['INSTRUMENT']
rowpath = os.path.join(path, row['_id']['$oid'])
for l1_data in row['links']['l1_data']:
oid = l1_data['$oid']
filename = f"{oid}.{ext}"
url = f"{self._BASE_URL}{inst}_l1_data.files/{oid}{binfile}"
downloader.enqueue_file(url, filename=os.path.join(rowpath, filename), max_splits=1)
@classmethod @classmethod
def _can_handle_query(cls, *query, hasinstr=False): def _can_handle_query(cls, *query, hasinstr=False):
...@@ -368,13 +399,13 @@ class KISClient(BaseClient): ...@@ -368,13 +399,13 @@ class KISClient(BaseClient):
("BBI", "Broadband Context Imager @ Gregor"), ("BBI", "Broadband Context Imager @ Gregor"),
("GRIS", "Gregor Infrared Spectrograph"), ("GRIS", "Gregor Infrared Spectrograph"),
("M-lite", "M-lite 2M Imagers @ Gregor"), ("M-lite", "M-lite 2M Imagers @ Gregor"),
("ECHELLE", "Echelle grating Spectrograph @ VTT"),
("HELLRIDE", "HELioseismic Large Region Interferometric Device @ VTT"),
("TESOS", "TESOS/VIP 2D Fabry-Perot interferometric spectrometer @ VTT"),
("LARS", "Lars is an Absolute Reference Spectrograph @ VTT")], ("LARS", "Lars is an Absolute Reference Spectrograph @ VTT")],
# ("ECHELLE", "Echelle grating Spectrograph @ VTT"), # 404 error
# ("HELLRIDE", "HELioseismic Large Region Interferometric Device @ VTT"),
# ("TESOS", "TESOS/VIP 2D Fabry-Perot interferometric spectrometer @ VTT"),
# ("GFPI", "Gregor Fast Processes Imaging Spectrograph"), # ("GFPI", "Gregor Fast Processes Imaging Spectrograph"),
# ("HiFi", "High-resolution Fast Imager @ Gregor"), # 404 error # ("HiFi", "High-resolution Fast Imager @ Gregor"), # 404 error
# ("TIP-II", "Tenerife Infrared Polarimeter @ VTT"), # name? # ("TIP-II", "Tenerife Infrared Polarimeter @ VTT"), # name?
# ("ZIMPOL", "Zeeman Imaging Polarimeter @ Gregor"), # 404 error # ("ZIMPOL", "Zeeman Imaging Polarimeter @ Gregor"), # 404 error
# description.TELESCOPE - Name of the telescope # description.TELESCOPE - Name of the telescope
a.sdc.Telescope: [("ChroTel", "10 cm Chromospheric Telescope, Observatorio del Teide"), a.sdc.Telescope: [("ChroTel", "10 cm Chromospheric Telescope, Observatorio del Teide"),
......
...@@ -2,10 +2,13 @@ import pytest ...@@ -2,10 +2,13 @@ import pytest
import urllib.request import urllib.request
from urllib.error import HTTPError, URLError from urllib.error import HTTPError, URLError
import os
import json import json
import parfive
import astropy.units as u import astropy.units as u
from astropy.io import fits from astropy.io import fits
from astropy.time import Time
from sunpy.net import Fido from sunpy.net import Fido
from sunpy.net import attrs as a from sunpy.net import attrs as a
from sunpy.net.base_client import QueryResponseTable from sunpy.net.base_client import QueryResponseTable
...@@ -78,6 +81,50 @@ def test_docker(client): ...@@ -78,6 +81,50 @@ def test_docker(client):
assert '2014-04-26T' in hdulist[0].header.get('DATE-OBS') assert '2014-04-26T' in hdulist[0].header.get('DATE-OBS')
hdulist.close() hdulist.close()
date = a.Time("2014/04/26 01:00", "2014/04/26 22:00")
downloader = parfive.Downloader()
inst = res[0]['description']['INSTRUMENT']
rowpath = f"{res[0]['_id']['$oid']}"
binfile = ''
ext = 'json'
for i, l1_data in enumerate(res[0]['links']['l1_data'][:10]):
oid = l1_data['$oid']
filename = f"{oid}.{ext}"
url = f"{_BASE_URL}{inst}_l1_data.files/{oid}{binfile}"
assert url == f"{_BASE_URL}gris_l1_data.files/{file_ids[i]}"
downloader.enqueue_file(url, filename=os.path.join(rowpath, filename), max_splits=1)
binfile = '/binary'
ext = 'fits'
for l1_data in res[0]['links']['l1_data'][:2]:
oid = l1_data['$oid']
filename = f"{oid}.{ext}"
url = f"{_BASE_URL}{inst}_l1_data.files/{oid}{binfile}"
downloader.enqueue_file(url, filename=os.path.join(rowpath, filename), max_splits=1)
assert downloader.queued_downloads == 12
assert downloader.http_queue[0].keywords['url'].startswith(_BASE_URL)
assert res[0]['links']['l1_data'][0]['$oid'] in downloader.http_queue[0].keywords['url']
assert downloader.http_queue[10].keywords['url'].endswith(binfile)
files = downloader.download()
assert len(files) == 12
assert os.path.dirname(files[0]) == '5ee0feb97a92554c6de920ab'
for filepath in files:
if filepath.endswith('.fits'):
hdulist = fits.open(filepath)
assert hdulist[0].header.get('TELESCOP') == 'GREGOR'
assert '2014-04-26T' in hdulist[0].header.get('DATE-OBS')
assert date.start < Time(hdulist[0].header['DATE-OBS']) < date.end
hdulist.close()
else:
assert filepath.endswith('.json')
meta = json.load(open(filepath))
assert date.start < Time(meta['metadata']['header']['DATE-BEG']) < date.end
assert date.start < Time(meta['metadata']['header']['DATE-OBS']) < date.end
assert meta['_id']['$oid'] == os.path.splitext(os.path.basename(filepath))[0]
def test_gridfs(client): def test_gridfs(client):
"""Test gridfs access on dockertest.""" """Test gridfs access on dockertest."""
...@@ -193,10 +240,16 @@ def test_fido_search(): ...@@ -193,10 +240,16 @@ def test_fido_search():
theta = [obs['description']['THETA'] for obs in res['kis']] theta = [obs['description']['THETA'] for obs in res['kis']]
assert (min(theta) >= 50) & (max(theta) <= 80) assert (min(theta) >= 50) & (max(theta) <= 80)
date = a.Time("2017/05/12 01:40", "2017/05/16 19:00")
res = Fido.search(a.Instrument("LARS") & date)
assert len(res['kis']) > 0
date_beg = [obs['description']['DATE_BEG']['$date'] for obs in res['kis']]
date_end = [obs['description']['DATE_END']['$date'] for obs in res['kis']]
assert max(date_beg) < date.end.unix * 1000
assert min(date_end) > date.start.unix * 1000
res = Fido.search(two_inst, a.sdc.Theta(50*u.deg, 80*u.deg)) res = Fido.search(two_inst, a.sdc.Theta(50*u.deg, 80*u.deg))
assert len(res['kis']) == 2 assert len(res['kis']) == 2
theta = [obs['description']['THETA'] for obs in res['kis'][1]]
assert (min(theta) >= 50) & (max(theta) <= 80)
assert len(res['kis'][0]) == 100 assert len(res['kis'][0]) == 100
assert res['kis'][0][0]['description']['INSTRUMENT'] == 'lars' assert res['kis'][0][0]['description']['INSTRUMENT'] == 'lars'
theta = [obs['description']['THETA'] for obs in res['kis'][0]] theta = [obs['description']['THETA'] for obs in res['kis'][0]]
...@@ -204,6 +257,11 @@ def test_fido_search(): ...@@ -204,6 +257,11 @@ def test_fido_search():
assert res['kis'][1, 0]['description']['INSTRUMENT'] == 'gris' assert res['kis'][1, 0]['description']['INSTRUMENT'] == 'gris'
theta = [obs['description']['THETA'] for obs in res['kis'][1]] theta = [obs['description']['THETA'] for obs in res['kis'][1]]
assert (min(theta) >= 50) & (max(theta) <= 80) assert (min(theta) >= 50) & (max(theta) <= 80)
date = a.Time("2016/08/26 16:25", "2016/08/26 16:45")
res = Fido.search(a.Instrument("GRIS"), a.sdc.PolStates('iquv'), date)
assert len(res['kis']) == 1
assert res['kis'][0]['description']['POL_STATES'] == 'IQUV'
else: else:
with pytest.raises(URLError, match=rf"{_dockerexc('gris')}" with pytest.raises(URLError, match=rf"{_dockerexc('gris')}"
r"{'description.THETA':{'.gte':50,'.lte':80}}"): r"{'description.THETA':{'.gte':50,'.lte':80}}"):
...@@ -219,6 +277,108 @@ def test_fido_search(): ...@@ -219,6 +277,108 @@ def test_fido_search():
assert "{'description.INSTRUMENT':'gris'" not in str(exc.value) assert "{'description.INSTRUMENT':'gris'" not in str(exc.value)
def test_fido_fetch():
"""Test search and fetch using the Fido interface."""
if not HAS_DOCKERTEST:
pytest.xfail("No dockertest running")
date = a.Time("2017/05/22 08:45", "2017/05/22 08:55")
res = Fido.search(a.Instrument("BBI"), date)
assert len(res['kis']) == 8
files = Fido.fetch(res['kis'])
assert len(files) == 45
for filepath in files:
meta = json.load(open(filepath))
assert meta['_id']['$oid'] == os.path.splitext(os.path.basename(filepath))[0]
assert date.start.isot[:12] in meta['metadata']['header']['DATE-BEG']
assert date.start < Time(meta['metadata']['header']['DATE-BEG']) < date.end
files = Fido.fetch(res['kis'][:1], binary=True)
assert os.path.dirname(files[0]).split(os.path.sep)[-1] == res['kis'][0]['_id']['$oid']
assert len(files) == 10
for filepath in files:
hdulist = fits.open(filepath)
assert hdulist[0].header.get('TELESCOP') == 'GREGOR'
assert hdulist[0].header.get('INSTRUME') == 'BBI'
assert date.start.iso[:12] in hdulist[0].header['DATE-OBS']
assert date.start < Time(hdulist[0].header['DATE-OBS']) < date.end
def test_fido_fetch_2():
"""
Test search and fetch from 2 instruments in time interval using the Fido interface.
Assert observations are within some exposure times (10 min) of range.
"""
date = a.Time("2016/08/26 16:25", "2016/08/26 16:26")
if not HAS_DOCKERTEST:
with pytest.raises(URLError, match=rf"{_dockerexc('gris')}"
rf"{{'description.DATE_BEG':{{'.lte':{{'.date':'{date.end.isot}'}}}}}},"
rf"{{'description.DATE_END':{{'.gte':{{'.date':'{date.start.isot}'}}"):
res = Fido.search((a.Instrument("GRIS") | a.Instrument("LARS")) & date)
else:
res = Fido.search((a.Instrument("GRIS") | a.Instrument("LARS")) & date)
assert len(res['kis']) == 2
assert len(res['kis'][0]) == 1
assert res['kis'][0][0]['description']['DATE_BEG']['$date'] < date.end.unix * 1000
assert res['kis'][0][0]['description']['DATE_END']['$date'] > date.start.unix * 1000
files = Fido.fetch(res['kis'], binary=False)
assert len(files) == 400
assert files[0].endswith('.json')
for filepath in files:
meta = json.load(open(filepath))
assert meta['_id']['$oid'] == os.path.splitext(os.path.basename(filepath))[0]
assert date.start.isot[:12] in meta['metadata']['header']['DATE-OBS']
assert date.start < Time(meta['metadata']['header']['DATE-OBS'])
date = a.Time("2016/05/13 10:55", "2016/05/13 11:00")
if not HAS_DOCKERTEST:
with pytest.raises(URLError, match=rf"{_dockerexc('bbi')}"
rf"{{'description.DATE_BEG':{{'.lte':{{'.date':'{date.end.isot}'}}}}}},"
rf"{{'description.DATE_END':{{'.gte':{{'.date':'{date.start.isot}'}}"):
res = Fido.search((a.Instrument("GRIS") | a.Instrument("LARS")) & date)
return
res = Fido.search((a.Instrument("GRIS") | a.Instrument("LARS")) & date)
assert len(res['kis']) == 2
assert len(res['kis'][1]) == 1
date_beg = [obs['description']['DATE_BEG']['$date'] for obs in res['kis'][0]]
date_end = [obs['description']['DATE_END']['$date'] for obs in res['kis'][0]]
assert max(date_beg) < date.end.unix * 1000
assert min(date_end) > date.start.unix * 1000
date_beg = [obs['description']['DATE_BEG']['$date'] for obs in res['kis'][1]]
date_end = [obs['description']['DATE_END']['$date'] for obs in res['kis'][1]]
assert max(date_beg) < date.end.unix * 1000
assert min(date_end) > date.start.unix * 1000
files = Fido.fetch(res['kis'], binary=False)
assert len(files.errors) == 0
assert len(files) >= 1
assert files[0].endswith('.json')
for filepath in files:
meta = json.load(open(filepath))
assert meta['_id']['$oid'] == os.path.splitext(os.path.basename(filepath))[0]
assert date.start.iso[:10] in meta['metadata']['header']['DATE-OBS']
assert Time(meta['metadata']['header']['DATE-OBS']).mjd < date.end.mjd + 600
files = Fido.fetch(res['kis'], binary=True)
assert files[0].endswith('.fits')
for filepath in files:
hdulist = fits.open(filepath)
assert hdulist[0].header['TELESCOP'] in ('GREGOR', 'VTT')
assert hdulist[0].header['INSTRUME'] in ('GRIS', 'LARS')
assert date.start.iso[:10] in hdulist[0].header['DATE-OBS']
assert Time(hdulist[0].header['DATE-OBS']).mjd < date.end.mjd + 600
assert Time(hdulist[0].header['DATE-OBS']).mjd > date.start.mjd - 600
hdulist.close()
assert len(files) == 301
@pytest.mark.parametrize("query", ((a.Instrument("GRIS") & a.Level(3)), @pytest.mark.parametrize("query", ((a.Instrument("GRIS") & a.Level(3)),
(a.Instrument("ChroTel") & a.Physobs("perspective.vortex")), (a.Instrument("ChroTel") & a.Physobs("perspective.vortex")),
(a.Level(0) & a.Instrument("Bob")), (a.Level(0) & a.Instrument("Bob")),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment