Commit 114e35a0 authored by Carl Schaffer's avatar Carl Schaffer
Browse files

Merge branch 'parsing_cleaning' into 'master'

Parsing cleaning

Closes #252

See merge request !145
parents 4ece3d7b 55191270
......@@ -10,35 +10,55 @@ from kis_tools.util.util import gris_run_number, date_from_fn
def parse_file(path):
Parse a GRIS calibration file. Capture all set variables as well as positional arguments for each call to any of
the calibration routines (gris_v6, gris_v7, gris_sp). Returns a list of dictionaries representing each call to
these routines.
path: path to the file
calls: list of dictionaries, each dictionary contains all necessary information for a call to one
of the calibration routines.
calls = []
env = {}
pattern = re.compile(r'(^[^=,]+)=(.*)')
with open(path, 'r') as infile:
pattern = re.compile(r"(^[^=,]+)=(.*)")
with open(path, "r") as infile:
for line in infile:
# remove comments
if ';' in line:
line = line[:line.find(';')]
if ";" in line:
line = line[: line.find(";")]
line = line.strip()
res =
if res:
varname, value = res.groups()
env[varname] = value.strip()
main_call_match ='^(gris_(?:v6|v7|sp))', line)
main_call_match ="^(gris_(?:v6|v7|sp))", line)
if main_call_match:
routine =
# extract keywords from call
keyword_options = re.findall(r'([^,]+)=([^, ]+)', line)
keywords = {keyword: value for keyword, value in keyword_options if value not in env}
keyword_options = re.findall(r"([^,]+)=([^, ]+)", line)
keywords = {
keyword: value
for keyword, value in keyword_options
if value not in env
# extract call flags
flags = ','.join(re.findall(r'(/[^,]+)', line))
current_env = {**env, **keywords, **{'main_routine': routine, 'boolean_keywords': flags}}
flags = ",".join(re.findall(r"(/[^,]+)", line))
current_env = {
**{"main_routine": routine, "boolean_keywords": flags},
# extract passed arguments
args = [arg for _, arg in re.findall(r"(?=(,([^/,=]+)[,$]))", line)]
positions = ['map', 'fileff', 'cal1']
positions = ["map", "fileff", "cal1"]
for a, right_name in zip(args, positions):
if a in current_env:
if a != right_name:
......@@ -48,40 +68,76 @@ def parse_file(path):
current_env[right_name] = a
# clean flatfield fields
ff: str = current_env['fileff']
ffs = re.findall(r'([^\[\],]+)',ff)
for ff, varname in zip(ffs, ['ff1', 'ff2']):
ff: str = current_env["fileff"]
ffs = re.findall(r"([^\[\],]+)", ff)
for ff, varname in zip(ffs, ["ff1", "ff2"]):
current_env[varname] = ff
del current_env['fileff']
del current_env["fileff"]
return calls
def get_gris_calib_scripts(path):
Recursively traverse a path to find IDL scripts for GRIS calibration, matches either or
format, ignores runwise calfiles generated by grisred
path: foldername or Path() object
files: generator for each file
p = Path(path)
pattern = r"^(?:calib|cal\d{2}\w{3}\d{2}).pro$"
files = (
idl_file for idl_file in p.rglob("*.pro") if re.match(pattern,
return files
def parse_path(path):
Parse IDL calibration routines for gris and return a DataFrame containing the settings for each call to
main calibration routines.
path: directory to traverse
call_df: Dataframe of calls
p = Path(path)
files = (idl_file for idl_file in p.rglob('*.pro') if
files = get_gris_calib_scripts(p)
calls = []
pb = tqdm(files)
for f in pb:
calls += parse_file(f)
if not calls:
raise ValueError(f"No gris calibration calls found in {path}")
call_df = DataFrame(calls)
call_df = call_df.drop_duplicates()
call_df['run'] = call_df['map'].apply(gris_run_number)
call_df['date'] = call_df['map'].apply(lambda x: date_from_fn(x).strftime('%Y-%m-%d'))
call_df["run"] = call_df["map"].apply(gris_run_number)
call_df["date"] = call_df["map"].apply(
lambda x: date_from_fn(x).strftime("%Y-%m-%d")
call_df['cal1'] = call_df.cal1.replace(r'[\[\]]', '', regex=True)
call_df["cal1"] = call_df.cal1.replace(r"[\[\]]", "", regex=True)
call_df.replace('',np.nan, inplace=True)
call_df.replace("", np.nan, inplace=True)
call_df = call_df.drop_duplicates()
assert ( == 1).all()
# Sort nicely
call_df = call_df.set_index(["date", "run"]).sort_index()
return call_df
if __name__ == '__main__':
if __name__ == "__main__":
calls = parse_path(sys.argv[1])
print('Use: calls.to_csv("path") to store the data.')
import os
import shutil
import unittest
from datetime import datetime
from glob import glob
from os.path import isdir, join
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest.mock import patch
import numpy as np
from importer_test_data import test_data, hmi_context
import pandas as pd
from importer_test_data import gris_folder
from importer_test_data.gris_structure import raw_files
from importer_test_data import test_data, hmi_context
from importer_test_data.gris_structure import (
folder as structured_gris_folder,
from kis_tools.gris.GrisArchive import GrisArchive
from kis_tools.gris.GrisDay import GrisDay
from kis_tools.gris.GrisFitsFile import GrisFitsFile
from kis_tools.gris.GrisRun import GrisRun
from kis_tools.gris.calib_settings.parsing import (
from kis_tools.gris.wcs import reorder_gris_data
from kis_tools.util.util import gris_run_number, groupby_gris_run
from kis_tools.util.util import groupby_gris_run
_testdata = "/dat/sdc/testing_data_for_importer/gris/"
......@@ -65,6 +72,28 @@ class TestGris(unittest.TestCase):
self.assertGreater(gd.count("*"), 0)
self.assertEqual(fakeday.count("*"), 0)
def test_calib_parsing(self):
Test the toolkit for parsing gris calibration settings files.
# Test get_files
files = [*get_gris_calib_scripts(structured_gris_folder)]
assert files
# test file_parsing
calls = parse_file(files[0])
assert calls
# test recursive parsing
call_df = parse_path(structured_gris_folder)
assert isinstance(call_df, pd.DataFrame)
# Try to parse an empty tempdir, ensure that an error is raised
with self.assertRaises(ValueError):
with TemporaryDirectory() as td:
def test_gris_fits_file(self):
"""test instantiation of GrisFitsFile """
gff = GrisFitsFile(self.filetarget)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment