parsing.py 5.63 KB
Newer Older
Carl Schaffer's avatar
Carl Schaffer committed
1
import os
2
3
import re
import sys
Carl Schaffer's avatar
Carl Schaffer committed
4
from os.path import join
5
6
from pathlib import Path

7
import numpy as np
8
from kis_tools.util.util import gris_run_number, date_from_fn
9
10
11
12
13
from pandas import DataFrame
from tqdm import tqdm


def parse_file(path):
Carl Schaffer's avatar
Carl Schaffer committed
14
15
16
17
18
19
20
21
22
23
24
25
    """
    Parse a GRIS calibration file. Capture all set variables as well as positional arguments for each call to any of
    the calibration routines (gris_v6, gris_v7, gris_sp). Returns a list of dictionaries representing each call to
    these routines.
    Args:
        path: path to the file

    Returns:
        calls: list of dictionaries, each dictionary contains all necessary information for a call to one
        of the calibration routines.

    """
26
27
    calls = []
    env = {}
Carl Schaffer's avatar
Carl Schaffer committed
28
29
    pattern = re.compile(r"(^[^=,]+)=(.*)")
    with open(path, "r") as infile:
30
31
        for line in infile:
            # remove comments
Carl Schaffer's avatar
Carl Schaffer committed
32
33
            if ";" in line:
                line = line[: line.find(";")]
34
35
36
37
38
39
            line = line.strip()
            res = pattern.search(line)
            if res:
                varname, value = res.groups()
                env[varname] = value.strip()

40
            main_call_match = re.search(r"^(gris(?:_v[0-9]+|_sp){0,1})\s*,", line)
41
42
43
44
            if main_call_match:
                routine = main_call_match.group(1)

                # extract keywords from call
Carl Schaffer's avatar
Carl Schaffer committed
45
46
47
48
49
50
                keyword_options = re.findall(r"([^,]+)=([^, ]+)", line)
                keywords = {
                    keyword: value
                    for keyword, value in keyword_options
                    if value not in env
                }
51
52

                # extract call flags
Carl Schaffer's avatar
Carl Schaffer committed
53
54
55
56
57
58
                flags = ",".join(re.findall(r"(/[^,]+)", line))
                current_env = {
                    **env,
                    **keywords,
                    **{"main_routine": routine, "boolean_keywords": flags},
                }
59
60

                # extract passed arguments
Carl Schaffer's avatar
Carl Schaffer committed
61
                args = [str.strip(arg) for _, arg in re.findall(r"(?=(,\s*([^/,=]+)\s*[,$]))", line)]
Carl Schaffer's avatar
Carl Schaffer committed
62
                positions = ["map", "fileff", "cal1"]
63
64
65
66
67
68
69
                for a, right_name in zip(args, positions):
                    if a in current_env:
                        if a != right_name:
                            current_env[right_name] = current_env[a]
                            del current_env[a]
                    else:
                        current_env[right_name] = a
70

71
                # clean flatfield fields
Carl Schaffer's avatar
Carl Schaffer committed
72
73
74
                ff: str = current_env["fileff"]
                ffs = re.findall(r"([^\[\],]+)", ff)
                for ff, varname in zip(ffs, ["ff1", "ff2"]):
75
                    current_env[varname] = ff
Carl Schaffer's avatar
Carl Schaffer committed
76
                del current_env["fileff"]
77
78

                calls.append(current_env)
79

80
81
    return calls

82

Carl Schaffer's avatar
Carl Schaffer committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
def get_gris_calib_scripts(path):
    """
    Recursively traverse a path to find IDL scripts for GRIS calibration, matches either calib.pro or cal24may14.pro
    format, ignores runwise calfiles generated by grisred

    Args:
        path: foldername or Path() object

    Returns:
        files: generator for each file
    """
    p = Path(path)
    pattern = r"^(?:calib|cal\d{2}\w{3}\d{2}).pro$"
    files = (
        idl_file for idl_file in p.rglob("*.pro") if re.match(pattern, idl_file.name)
    )
    return files


102
def parse_path(path):
Carl Schaffer's avatar
Carl Schaffer committed
103
104
105
106
107
108
109
110
111
112
    """
    Parse IDL calibration routines for gris and return a DataFrame containing the settings for each call to
    main calibration routines.

    Args:
        path: directory to traverse

    Returns:
        call_df: Dataframe of calls
    """
113
    p = Path(path)
Carl Schaffer's avatar
Carl Schaffer committed
114
    files = [*get_gris_calib_scripts(p)]
115
116
117
    calls = []
    pb = tqdm(files)
    for f in pb:
118
        calls += parse_file(f)
Carl Schaffer's avatar
Carl Schaffer committed
119
120
121
122

    if not calls:
        raise ValueError(f"No gris calibration calls found in {path}")

123
    call_df = DataFrame(calls)
124
125
    call_df = call_df.drop_duplicates()

Carl Schaffer's avatar
Carl Schaffer committed
126
127
128
129
    call_df["run"] = call_df["map"].apply(gris_run_number)
    call_df["date"] = call_df["map"].apply(
        lambda x: date_from_fn(x).strftime("%Y-%m-%d")
    )
130

Carl Schaffer's avatar
Carl Schaffer committed
131
    call_df["cal1"] = call_df.cal1.replace(r"[\[\]]", "", regex=True)
132

Carl Schaffer's avatar
Carl Schaffer committed
133
    call_df.replace("", np.nan, inplace=True)
134
135
    call_df = call_df.drop_duplicates()

Carl Schaffer's avatar
Carl Schaffer committed
136
137
    # Sort nicely
    call_df = call_df.set_index(["date", "run"]).sort_index()
138
139
    return call_df

140

Carl Schaffer's avatar
Carl Schaffer committed
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def parse_archive(path, outfolder=os.getcwd()):
    """
    Parse calibration settings from IDL routine descriptions.
     Recursively goes through all calDDMMMYY.pro files and pulls
     the configuration of the gris pipeline

     Saves a .csv file in your working directory
    Args:
        path: top level directory

    Returns:

    """
    # find all calibration .pro files under path
    target = Path(path)
    files = [f for f in target.rglob('cal???????.pro') if not '_run' in str(f)]
    folders = [*{Path(f).parent for f in files}]
    dfs = []
    for f in folders:
        print(f)
        try:
            dfs.append(parse_path(str(f)))
        except:
            print("error")
    dfs
    full = DataFrame().append(dfs)
    full.columns = [c.strip() for c in full.columns]
    full = full.reset_index()
    target_order = [c for c in ['date', 'run', 'map', 'main_routine', 'ff1', 'ff2', 'cal1', 'filedc',
                                'lambda', 'rotator_offset', 'xtau', 'data', 'pzero',
                                'boolean_keywords'] if c in full.columns]
    full = full[target_order]
    outfile = join(outfolder, 'parsed_calibration_settings.csv')
    full.sort_values(['date', 'run']).to_csv(outfile, index=False)
    print(f"Wrote parsed calibration to {str(Path(outfile).absolute())}")


Carl Schaffer's avatar
Carl Schaffer committed
178
if __name__ == "__main__":
179
    calls = parse_path(sys.argv[1])
Carl Schaffer's avatar
Carl Schaffer committed
180
    print('Use: calls.to_csv("path") to store the data.')
181
    print(calls)