check_gris_calib.py 15.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
"""
Author: Carl Schaffer Date: 2018 December 19th
Mail: schaffer@leibniz-kis.de

Scripts to generate a dataframe representing the calibration settings
used for calibrating the gris archive. The general idea is to find all
calibration files and represent each call to calibration routines
ocurring in these files as a line in a pandas DataFrame. each row in
the dataframe represents a variable within the idl environment when
calling the command.
11
12
13

"""

Carl Schaffer's avatar
Carl Schaffer committed
14
import datetime
Carl Schaffer's avatar
Carl Schaffer committed
15
import pickle
16
import re
Carl Schaffer's avatar
Carl Schaffer committed
17
from glob import glob
Carl Schaffer's avatar
Carl Schaffer committed
18
from os.path import basename, join, exists
Carl Schaffer's avatar
Carl Schaffer committed
19
20
21

import numpy as np
import pandas as pd
22
23
from kis_tools.util.util import get_fits_header_df
from kis_tools.util.util import gris_run_number, groupby_function
24

25

26
27
28
##################################################
# Processing of Calfiles
##################################################
29

30

Carl Schaffer's avatar
Carl Schaffer committed
31
def remove_comments(string, comment_sign=";"):
32
33
34
    """Take a string, and remove all comments, this is done by
    splitting the text into lines and then dropping all parts of each
    line that occur after the first comment character."""
Carl Schaffer's avatar
Carl Schaffer committed
35
    lines = string.split("\n")
36
37
38
    clean_lines = []
    for l in lines:
        clean_lines.append(l.split(comment_sign)[0])
Carl Schaffer's avatar
Carl Schaffer committed
39
    return "\n".join(clean_lines)
40
41
42


def process_calfile(fn):
43
44
    """loop over the lines of a gris calibration file and log the
    state of all defined variables as well as the command each time a
Carl Schaffer's avatar
Carl Schaffer committed
45
    command is called"""
46
47

    # open file
Carl Schaffer's avatar
Carl Schaffer committed
48
49
    with open(fn, "r") as infile:
        text = infile.read()
50
    # ignore all comments
51
52
53
    text = remove_comments(text)

    results = []
Carl Schaffer's avatar
Carl Schaffer committed
54
    data = {"filename": fn}
55
    # loop over lines
Carl Schaffer's avatar
Carl Schaffer committed
56
    for line in text.split("\n"):
57
58
59
        # check if line contains an idl command, commands are assumed
        # to be names of routines followed by a opening parenthesis or
        # a comma sign
Carl Schaffer's avatar
Carl Schaffer committed
60
61
        if re.search(r"^([^,=\(])+[,\(]", line):
            data["command"] = line.strip()
62
            results.append(data.copy())
63
64
65
        # if a line does not contain data, check if it contains any
        # variable assignments, if a variable is already defined it is
        # overwritten
66
        else:
Carl Schaffer's avatar
Carl Schaffer committed
67
            matches = re.findall(r"([a-zA-Z_]+)\s*=\s*([^\n]+)", line)
68
69
70
71
72
73
            for m in matches:
                data[m[0]] = m[1]
    return results


def find_commented_files(calfiles):
74
75
    """Generate a dataframe containing a list pf all files containing
    commented calibration routines"""
76
77
    df = pd.DataFrame()
    for c in calfiles:
Carl Schaffer's avatar
Carl Schaffer committed
78
        text = open(c, "r")
79
80
        commented = False
        for line in text:
Carl Schaffer's avatar
Carl Schaffer committed
81
            if re.search(r"[^\S\n]*;+[^\S\n]*gris[^\n]+,", line):
82
83
84
85
                commented = True
                print(c)
                print(line)
        if commented:
Carl Schaffer's avatar
Carl Schaffer committed
86
            df = df.append({"filename": c}, ignore_index=True)
87
88
89
    return df


Carl Schaffer's avatar
Carl Schaffer committed
90
91
def get_calfile_df(archive="/dat/sdc/gris/", pattern="*/cal[0-9][0-9]?????.pro"):
    """Construct a pandas DataFrame containing information on all
Carl Schaffer's avatar
changes    
Carl Schaffer committed
92
93
94
    calls to gris calibration routines. Each row in the dataframe
    represents a variable within the idl environment when calling the
    command.
95

Carl Schaffer's avatar
changes    
Carl Schaffer committed
96
97
98
    Args:
      archive: path to archive
      pattern: unix style pattern to select all files
99

Carl Schaffer's avatar
changes    
Carl Schaffer committed
100
101
    Returns:
      result: DataFrame ordered and cleaned representing the matched calfiles
102

Carl Schaffer's avatar
changes    
Carl Schaffer committed
103
    """
104
105

    # find all calfiles
Carl Schaffer's avatar
changes    
Carl Schaffer committed
106
    calfiles = glob(join(archive, pattern))
107
108
109
110
111
112
113
114
115
116

    # initialize empty dataframe and fill from calfiles
    df = pd.DataFrame()
    for c in calfiles:
        results = process_calfile(c)
        for r in results:
            df = df.append(r, ignore_index=True)

    # sometimes 'file' is used instead of map, fix this
    missing_map = df[df.map.isnull()]
Carl Schaffer's avatar
Carl Schaffer committed
117
    df.loc[missing_map.index, "map"] = df.loc[missing_map.index, "file"]
118
119

    # make index by cleaning map column and adding data and run columns
Carl Schaffer's avatar
Carl Schaffer committed
120
121
122
123
124
    df["map"] = clean_col(df["map"])
    df["runid"] = df.map.str.extract(r"(\d\d\w\w\w\d\d.\d\d\d)")
    df["run"] = pd.to_numeric(df["runid"].str.split(".").str.get(1))
    df["date"] = pd.to_datetime(df["runid"].str.split(".").str.get(0))
    df = df.set_index(["date", "run"])
125
126

    # separate flatfiled and calibration files into separate columns
Carl Schaffer's avatar
Carl Schaffer committed
127
128
    df[["ff1", "ff2"]] = split_file_list(df.fileff)
    df[["cal1", "cal2"]] = split_file_list(df.filecal)
129

130
    # clean dark column
Carl Schaffer's avatar
Carl Schaffer committed
131
    df["filedc"] = clean_col(df["filedc"].astype(str))
132
133
134

    # extract boolean keywords from command, boolean keywords are
    # preceded by a frontslash e.g. /xtalk
Carl Schaffer's avatar
Carl Schaffer committed
135
    df["boolean_keywords"] = np.nan
136
    for i, r in df.iterrows():
Carl Schaffer's avatar
Carl Schaffer committed
137
138
        c = r["command"]
        matches = re.findall(r"(/\w+)+", c)
Carl Schaffer's avatar
Carl Schaffer committed
139
        if matches:
Carl Schaffer's avatar
Carl Schaffer committed
140
            df.loc[i, "boolean_keywords"] = ",".join(matches)
141

Carl Schaffer's avatar
changes    
Carl Schaffer committed
142
    # generate column for called routine
Carl Schaffer's avatar
Carl Schaffer committed
143
    df["main_routine"] = extract_main_routine(df.command)
144

Carl Schaffer's avatar
changes    
Carl Schaffer committed
145
    # replace invalid values with nan
Carl Schaffer's avatar
Carl Schaffer committed
146
147
    df = df.replace("None", np.nan)
    df = df.replace("nan", np.nan)
Carl Schaffer's avatar
changes    
Carl Schaffer committed
148

149
    # extract value for keywords from command call
Carl Schaffer's avatar
Carl Schaffer committed
150
    keywords = ["lambda", "rotator_offset", "filedc", "xtau", "data", "pzero"]
151
    for k in keywords:
152
        df[k] = extract_kw_value_from_command(df, k)
153

154
    # select relevant columns
Carl Schaffer's avatar
Carl Schaffer committed
155
    columns = [
Carl Schaffer's avatar
Carl Schaffer committed
156
157
158
159
160
161
162
163
164
165
166
167
        "map",
        "main_routine",
        "ff1",
        "ff2",
        "cal1",
        "filedc",
        "lambda",
        "rotator_offset",
        "xtau",
        "data",
        "pzero",
        "boolean_keywords",
Carl Schaffer's avatar
Carl Schaffer committed
168
    ]
169
170

    result = df[columns].copy()
171
    result.sort_index(inplace=True)
Carl Schaffer's avatar
changes    
Carl Schaffer committed
172
173
    return result

Carl Schaffer's avatar
Carl Schaffer committed
174

Carl Schaffer's avatar
changes    
Carl Schaffer committed
175
176
177
178
179
180
181
##################################################
# Cleaning and ordering of results
##################################################


def extract_main_routine(column):
    """extract name of main routine in a command line"""
Carl Schaffer's avatar
Carl Schaffer committed
182
    return column.str.extract(r"(^[^,\(]+)[,\(]")
Carl Schaffer's avatar
changes    
Carl Schaffer committed
183
184
185
186
187
188


def clean_col(column):
    """make values in a column containing filenames consistent by
    removing all remaining list artefacts, trailing spaces, or
    unnecessary directory information"""
Carl Schaffer's avatar
Carl Schaffer committed
189
190
191
    column = column.str.replace("[", "")
    column = column.str.replace("]", "")
    column = column.str.replace("'", "")
192
    column = column.replace(np.nan, "")
Carl Schaffer's avatar
changes    
Carl Schaffer committed
193
194
195
196
197
198
199
200
    column = column.str.strip()
    column = column.apply(lambda x: basename(x) if x else np.nan)
    return column


def split_file_list(column):
    """transform columns containing a comma separated list into
    multiple columns, clean the columns afterwards"""
Carl Schaffer's avatar
Carl Schaffer committed
201
    df = column.str.split(",", expand=True)
Carl Schaffer's avatar
changes    
Carl Schaffer committed
202
203
204
205
206
207
    for c in df.columns:
        df[c] = clean_col(df[c].astype(str))
    return df


def extract_kw_value_from_command(df, kw):
Carl Schaffer's avatar
Carl Schaffer committed
208
209
    pat = r"{}\s*=\s*([\s\d,\.\[\]+-]+)[,$]".format(kw)
    padded = df["command"] + ","
Carl Schaffer's avatar
changes    
Carl Schaffer committed
210
211
212
213
214
215
216
    extracted = padded.str.extract(pat)[0]
    if kw in df.columns:
        extracted = extracted.astype(df[kw].dtype)
        extracted = extracted.combine_first(df[kw])
    return extracted


217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
def get_best_ffs(runname, l0kws):
    """Get the two best ffs for a given gris run, only flatfields

    Procedure:
    1: Get all flats from same day
    2: Calculate the time difference to the measurement for each flat
    3: Split into 'before' and 'after' sets
    4: Try to successively find the closest flat in each of these categories:
       a: parameters match and delta_t < 2
       b: parameters don't match and delta_t < 2
       c: parameters match and same day
       d: parameters don't match and same day

    Args:
      runname : run identifier formatted as "28may19.001"
      l0kws   : pandas DataFrame containing FITS headers for level0 files

    Returns:
      results : list of flatfields either empty one or two elements
    """
    print(runname)
Carl Schaffer's avatar
Carl Schaffer committed
238
    verbname, runnumber = re.search(r"(\d+\w+\d+)\.(\d\d\d)", runname).groups()
239
240
241
242
    date = pd.to_datetime(verbname).date()
    runnumber = int(runnumber)

    # get all entries matching the day
Carl Schaffer's avatar
Carl Schaffer committed
243
244
245
    day = l0kws[str(date)][
        ["date", "run", "ACCUMULA", "EXPTIME", "MEASURE", "FILENAME"]
    ].copy()
246
247
248
249
250
251

    run = day[day.run == runnumber]
    if run.empty:
        return []

    # select all flatfield measurements
Carl Schaffer's avatar
Carl Schaffer committed
252
    candidates = day[day.MEASURE == "flat field"]
253
254
255
256
    if candidates.empty:
        return []

    # calculate time_differences in hours
Carl Schaffer's avatar
Carl Schaffer committed
257
    candidates["delta_t"] = candidates.FILENAME.apply(lambda x: get_time(x, l0kws))
Carl Schaffer's avatar
Carl Schaffer committed
258
259
260
261
    candidates.delta_t = candidates.delta_t - get_time(
        f"{verbname}.{runnumber:03d}", l0kws
    )
    candidates.delta_t = candidates.delta_t.dt.total_seconds() / (60 * 60)
262
263

    # check if parameters match:
Carl Schaffer's avatar
Carl Schaffer committed
264
    pars_to_check = ["ACCUMULA", "EXPTIME"]
265
    try:
Carl Schaffer's avatar
Carl Schaffer committed
266
        candidates["matching_pars"] = (
Carl Schaffer's avatar
Carl Schaffer committed
267
            run[pars_to_check].values == candidates[pars_to_check]
Carl Schaffer's avatar
Carl Schaffer committed
268
        ).sum(axis=1)
269
270
    except:
        print(runname)
Carl Schaffer's avatar
Carl Schaffer committed
271
        return []
272
273
274
275
276
277
278
279
280
281

    # split into before and after:
    before = candidates[np.sign(candidates.delta_t) == -1.0]
    after = candidates[np.sign(candidates.delta_t) == 1.0]

    hits = []
    for flat_candidates in [before, after]:
        if flat_candidates.empty:
            continue

Carl Schaffer's avatar
Carl Schaffer committed
282
        lt_2 = np.abs(flat_candidates.delta_t) <= 2
Carl Schaffer's avatar
Carl Schaffer committed
283
        match_pars = flat_candidates.matching_pars == len(pars_to_check)
284

285
286
287
288
        combinations = [
            match_pars & lt_2,
            lt_2,
            match_pars,
Carl Schaffer's avatar
bugfix    
Carl Schaffer committed
289
            np.full((len(flat_candidates)), True, dtype=bool),
290
        ]
291
292
293
294

        for c in combinations:
            if c.sum() == 0:
                continue
295
            flat_candidates.delta_t = np.abs(flat_candidates.delta_t)
Carl Schaffer's avatar
Carl Schaffer committed
296
297
            matches = flat_candidates[c].sort_values("delta_t")
            hits.append(matches.iloc[0]["run"])
298
299
300
            break

    # make verbose runname as result
Carl Schaffer's avatar
Carl Schaffer committed
301
    res = [f"{verbname}.{int(h):03d}" for h in hits]
302
303
304
305
306
    return res


def get_time(runname, l0kws):
    try:
Carl Schaffer's avatar
Carl Schaffer committed
307
        verbname, runnumber = runname.split(".")
308
309
310
    except:
        return np.nan
    date = pd.to_datetime(verbname).date()
Carl Schaffer's avatar
Carl Schaffer committed
311
312
313
314
    runnumber = int(runnumber.split("-")[0])
    day = l0kws[str(date)][
        ["date", "run", "ACCUMULA", "EXPTIME", "MEASURE", "DATE-OBS", "UTIME"]
    ]
315
316
317
318
319
    run = day[day.run == runnumber]

    if run.empty:
        return np.nan
    else:
Carl Schaffer's avatar
Carl Schaffer committed
320
        time = run.iloc[0]["UTIME"]
321
322
323
        time = pd.to_datetime(time).time()
        dt = datetime.datetime.combine(date, time)
        return dt
Carl Schaffer's avatar
changes    
Carl Schaffer committed
324
325


326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
def append_command(command, col):
    """append a command to a column, chacks if each field contains a
    value, and either appends separated by a comma or sets the value
    if none is set

    pars:
      command: string to be appended
      col: pd.Series containing the values

    returns:
      col: modified column
    """
    col = col.copy()
    for i, v in col.items():
        if v == v:  # check for nan
            v = ",".join([str(v), command])
        else:
            v = command
        col[i] = v
    return col


348
def pars_match(run_a, run_b, l0kws):
349
350
351
352
353
354
355
356
    """Check if the parameters for two given runs match

    pars:
      run_a, run_b: gris run identifiers DDMMMYY.run such as 24apr14.000
      l0kws: pandas DataFrame containing level0 keywords
    returns:
      result: boolean
    """
357
358
359
360
361
362
363
364
365
366
367
368
    pars = []
    for r in [run_a, run_b]:
        verbname, runnumber = r.split(".")
        date = str(pd.to_datetime(verbname).date())
        runnumber = int(runnumber)
        day = l0kws[date]
        run = day[day.run == runnumber]
        pars.append(run[["EXPTIME"]].values.tolist())
    result = pars[0] == pars[1]
    return result


Carl Schaffer's avatar
changes    
Carl Schaffer committed
369
370
371
372
373
##################################################
# Main block
##################################################


Carl Schaffer's avatar
Carl Schaffer committed
374
375
def get_l0_runlist(archive, force_update=False, storage_path="/dat/schaffer/data/"):
    """Get a list of all data files contained in the */level0/
Carl Schaffer's avatar
Carl Schaffer committed
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
    subfolders, log, cal and textfiles are ignored"""

    fn = "gris_l0_runs_" + archive.replace("/", "_") + ".pkl"
    fn = join(storage_path, fn)
    if exists(fn) and not force_update:
        print(fn)
        with open(fn, "rb") as cache_file:
            l0_runlist = pickle.load(cache_file)
        return l0_runlist
    else:
        runpattern = r"(\d{2}\w{3}\d{2}\.\d{3})"
        pattern = join(archive, "*/level0/*")
        files = glob(pattern)
        # filter files:
        files = list(map(basename, files))
Carl Schaffer's avatar
Carl Schaffer committed
391
        files = list(filter(lambda x: not x[-3:] in ["log", "txt", "cal"], files))
Carl Schaffer's avatar
Carl Schaffer committed
392
393
        files = list(filter(lambda x: re.search(runpattern, x), files))

Carl Schaffer's avatar
Carl Schaffer committed
394
        l0_runlist = list(set([re.search(runpattern, r).group(0) for r in files]))
Carl Schaffer's avatar
Carl Schaffer committed
395
396
397
398
399
        with open(fn, "wb+") as cache_file:
            pickle.dump(l0_runlist, cache_file)
        return l0_runlist


Carl Schaffer's avatar
changes    
Carl Schaffer committed
400
401
if __name__ == "__main__":

Carl Schaffer's avatar
Carl Schaffer committed
402
    archive = "/dat/sdc/gris/"
403
    # get dataframe from calibration files
Carl Schaffer's avatar
changes    
Carl Schaffer committed
404
    result = get_calfile_df(archive=archive)
405

406
407
408
409
410
411
    df2 = result.sort_index()
    new = result.loc["2019-09-01"]
    new.boolean_keywords = new.boolean_keywords.fillna("") + "/lazy/quiet"
    #    with open("/dat/schaffer/projects/grisred/gris_calibration_settings.csv", "a+") as out:
    #        new.to_csv(out, header=False)

Carl Schaffer's avatar
changes    
Carl Schaffer committed
412
    # check which maps are present:
Carl Schaffer's avatar
Carl Schaffer committed
413
    by_run = groupby_function(
Carl Schaffer's avatar
Carl Schaffer committed
414
415
416
        sorted(get_l0_runlist("/dat/sdc/gris/", force_update=True)),
        gris_run_number,
    )
Carl Schaffer's avatar
Carl Schaffer committed
417
    runfiles = [a[0] for _, a in by_run.items()]
418
419

    # remove invalid or obsolete keywords from boolean_keywords column
420
    for invalid_option in ["/filt", "/xtalk", "/show"]:
421
422
423
424
425
426
        result.boolean_keywords = result.boolean_keywords.str.replace(
            f"{invalid_option},", ""
        )
        result.boolean_keywords = result.boolean_keywords.str.replace(
            f",{invalid_option}", ""
        )
427
428
429
430
431
        result.boolean_keywords = result.boolean_keywords.replace(
            f"{invalid_option}", np.nan
        )

    # ensure that all maps use the correct calibration routine
Carl Schaffer's avatar
Carl Schaffer committed
432
    result["main_routine"] = result.main_routine.replace("gris_sp_v5", "gris_sp")
433
434
    result["main_routine"] = result.main_routine.replace("gris", "gris_v6")
    result["main_routine"] = result.main_routine.replace("gris_v5", "gris_v6")
435
436

    # replace empty strings with nan
Carl Schaffer's avatar
Carl Schaffer committed
437
    result["boolean_keywords"] = result[["boolean_keywords"]].replace("", np.nan)
438

439
440
441
442
443
    # add lazy and quiet keywords
    result["boolean_keywords"] = append_command(
        "/lazy,/quiet", result["boolean_keywords"]
    )

Carl Schaffer's avatar
Carl Schaffer committed
444
    l0kws = get_fits_header_df(runfiles, "/dat/schaffer/data/l0_keywords.pkl")
Carl Schaffer's avatar
changes    
Carl Schaffer committed
445
446

    l0kws = l0kws.reset_index()
Carl Schaffer's avatar
Carl Schaffer committed
447
448
    l0kws["datetime"] = pd.to_datetime(
        l0kws.date.astype(str).str.cat(l0kws.UT, sep=" ")
Carl Schaffer's avatar
Carl Schaffer committed
449
    )
Carl Schaffer's avatar
Carl Schaffer committed
450
    l0kws = l0kws.set_index("datetime")
451

Carl Schaffer's avatar
Carl Schaffer committed
452
453
454
    flats_rec = pd.read_pickle("recommended_flatfields.pkl")
    ffstudy = result[["map", "ff1", "ff2"]]
    ffstudy[["ff1_rec", "ff2_rec"]] = flats_rec
455
456
    fs = ffstudy

Carl Schaffer's avatar
Carl Schaffer committed
457
    for c in ["map", "ff1", "ff2", "ff1_rec", "ff2_rec"]:
458
        print(c)
Carl Schaffer's avatar
Carl Schaffer committed
459
        ffstudy[c + "_time"] = ffstudy[c].apply(lambda x: get_time(x, l0kws))
460

Carl Schaffer's avatar
Carl Schaffer committed
461
    for c in ["ff1_time", "ff2_time", "ff1_rec_time", "ff2_rec_time"]:
Carl Schaffer's avatar
Carl Schaffer committed
462
463
464
        ffstudy[c + "diff"] = (ffstudy["map_time"] - ffstudy[c]).dt.total_seconds() / (
            60 * 60
        )
465
466
    no_f2 = ffstudy[ffstudy.ff2.isnull()]
    matches = (no_f2.ff1 == no_f2.ff1_rec) | (no_f2.ff1 == no_f2.ff2_rec)
Carl Schaffer's avatar
Carl Schaffer committed
467
468
    spec = l0kws[l0kws["STATES"] == 1][["date", "run", "FILENAME"]]
    spec_runs = spec["FILENAME"].str.extract(r"(\d\d\w\w\w\d\d.\d\d\d)")
469
    result[result.map.isin(spec_runs.values.flatten())].command
470
471
472
473
474
475
476
477

    # check calibration file parameter matching
    for i, row in result.iterrows():
        if "gris_sp" in row["main_routine"]:
            res = True
        else:
            res = pars_match(row["map"], row["cal1"], l0kws)
        result.loc[i, "cal_match"] = res