gris_calib.py 15.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
"""
Author: Carl Schaffer Date: 2018 December 19th
Mail: schaffer@leibniz-kis.de

Scripts to generate a dataframe representing the calibration settings
used for calibrating the gris archive. The general idea is to find all
calibration files and represent each call to calibration routines
ocurring in these files as a line in a pandas DataFrame. each row in
the dataframe represents a variable within the idl environment when
calling the command.
11
12
13

"""

Carl Schaffer's avatar
Carl Schaffer committed
14
import datetime
Carl Schaffer's avatar
Carl Schaffer committed
15
import pickle
16
import re
Carl Schaffer's avatar
Carl Schaffer committed
17
from glob import glob
Carl Schaffer's avatar
Carl Schaffer committed
18
from os.path import basename, join, exists
Carl Schaffer's avatar
Carl Schaffer committed
19
20
21

import numpy as np
import pandas as pd
22
23
from kis_tools.util.util import get_fits_header_df
from kis_tools.util.util import gris_run_number, groupby_function
24

25

26
27
28
##################################################
# Processing of Calfiles
##################################################
29

30

Carl Schaffer's avatar
Carl Schaffer committed
31
def remove_comments(string, comment_sign=";"):
32
33
34
    """Take a string, and remove all comments, this is done by
    splitting the text into lines and then dropping all parts of each
    line that occur after the first comment character."""
Carl Schaffer's avatar
Carl Schaffer committed
35
    lines = string.split("\n")
36
37
38
    clean_lines = []
    for l in lines:
        clean_lines.append(l.split(comment_sign)[0])
Carl Schaffer's avatar
Carl Schaffer committed
39
    return "\n".join(clean_lines)
40
41
42


def process_calfile(fn):
43
44
45
46
47
    """loop over the lines of a gris calibration file and log the
    state of all defined variables as well as the command each time a
    command is called """

    # open file
Carl Schaffer's avatar
Carl Schaffer committed
48
49
    with open(fn, "r") as infile:
        text = infile.read()
50
    # ignore all comments
51
52
53
    text = remove_comments(text)

    results = []
Carl Schaffer's avatar
Carl Schaffer committed
54
    data = {"filename": fn}
55
    # loop over lines
Carl Schaffer's avatar
Carl Schaffer committed
56
    for line in text.split("\n"):
57
58
59
        # check if line contains an idl command, commands are assumed
        # to be names of routines followed by a opening parenthesis or
        # a comma sign
Carl Schaffer's avatar
Carl Schaffer committed
60
61
        if re.search(r"^([^,=\(])+[,\(]", line):
            data["command"] = line.strip()
62
            results.append(data.copy())
63
64
65
        # if a line does not contain data, check if it contains any
        # variable assignments, if a variable is already defined it is
        # overwritten
66
        else:
Carl Schaffer's avatar
Carl Schaffer committed
67
            matches = re.findall(r"([a-zA-Z_]+)\s*=\s*([^\n]+)", line)
68
69
70
71
72
73
            for m in matches:
                data[m[0]] = m[1]
    return results


def find_commented_files(calfiles):
74
75
    """Generate a dataframe containing a list pf all files containing
    commented calibration routines"""
76
77
    df = pd.DataFrame()
    for c in calfiles:
Carl Schaffer's avatar
Carl Schaffer committed
78
        text = open(c, "r")
79
80
        commented = False
        for line in text:
Carl Schaffer's avatar
Carl Schaffer committed
81
            if re.search(r"[^\S\n]*;+[^\S\n]*gris[^\n]+,", line):
82
83
84
85
                commented = True
                print(c)
                print(line)
        if commented:
Carl Schaffer's avatar
Carl Schaffer committed
86
            df = df.append({"filename": c}, ignore_index=True)
87
88
89
    return df


90
def get_calfile_df(
Carl Schaffer's avatar
Carl Schaffer committed
91
        archive="/dat/sdc/gris/", pattern="*/cal[0-9][0-9]?????.pro"
92
):
Carl Schaffer's avatar
Carl Schaffer committed
93
    """ Construct a pandas DataFrame containing information on all
Carl Schaffer's avatar
changes    
Carl Schaffer committed
94
95
96
    calls to gris calibration routines. Each row in the dataframe
    represents a variable within the idl environment when calling the
    command.
97

Carl Schaffer's avatar
changes    
Carl Schaffer committed
98
99
100
    Args:
      archive: path to archive
      pattern: unix style pattern to select all files
101

Carl Schaffer's avatar
changes    
Carl Schaffer committed
102
103
    Returns:
      result: DataFrame ordered and cleaned representing the matched calfiles
104

Carl Schaffer's avatar
changes    
Carl Schaffer committed
105
    """
106
107

    # find all calfiles
Carl Schaffer's avatar
changes    
Carl Schaffer committed
108
    calfiles = glob(join(archive, pattern))
109
110
111
112
113
114
115
116
117
118

    # initialize empty dataframe and fill from calfiles
    df = pd.DataFrame()
    for c in calfiles:
        results = process_calfile(c)
        for r in results:
            df = df.append(r, ignore_index=True)

    # sometimes 'file' is used instead of map, fix this
    missing_map = df[df.map.isnull()]
Carl Schaffer's avatar
Carl Schaffer committed
119
    df.loc[missing_map.index, "map"] = df.loc[missing_map.index, "file"]
120
121

    # make index by cleaning map column and adding data and run columns
Carl Schaffer's avatar
Carl Schaffer committed
122
123
124
125
126
    df["map"] = clean_col(df["map"])
    df["runid"] = df.map.str.extract(r"(\d\d\w\w\w\d\d.\d\d\d)")
    df["run"] = pd.to_numeric(df["runid"].str.split(".").str.get(1))
    df["date"] = pd.to_datetime(df["runid"].str.split(".").str.get(0))
    df = df.set_index(["date", "run"])
127
128

    # separate flatfiled and calibration files into separate columns
Carl Schaffer's avatar
Carl Schaffer committed
129
130
    df[["ff1", "ff2"]] = split_file_list(df.fileff)
    df[["cal1", "cal2"]] = split_file_list(df.filecal)
131

132
    # clean dark column
Carl Schaffer's avatar
Carl Schaffer committed
133
    df["filedc"] = clean_col(df["filedc"].astype(str))
134
135
136

    # extract boolean keywords from command, boolean keywords are
    # preceded by a frontslash e.g. /xtalk
Carl Schaffer's avatar
Carl Schaffer committed
137
    df["boolean_keywords"] = np.nan
138
    for i, r in df.iterrows():
Carl Schaffer's avatar
Carl Schaffer committed
139
140
        c = r["command"]
        matches = re.findall(r"(/\w+)+", c)
Carl Schaffer's avatar
Carl Schaffer committed
141
        if matches:
Carl Schaffer's avatar
Carl Schaffer committed
142
            df.loc[i, "boolean_keywords"] = ",".join(matches)
143

Carl Schaffer's avatar
changes    
Carl Schaffer committed
144
    # generate column for called routine
Carl Schaffer's avatar
Carl Schaffer committed
145
    df["main_routine"] = extract_main_routine(df.command)
146

Carl Schaffer's avatar
changes    
Carl Schaffer committed
147
    # replace invalid values with nan
Carl Schaffer's avatar
Carl Schaffer committed
148
149
    df = df.replace("None", np.nan)
    df = df.replace("nan", np.nan)
Carl Schaffer's avatar
changes    
Carl Schaffer committed
150

151
    # extract value for keywords from command call
Carl Schaffer's avatar
Carl Schaffer committed
152
    keywords = ["lambda", "rotator_offset", "filedc", "xtau", "data", "pzero"]
153
    for k in keywords:
154
        df[k] = extract_kw_value_from_command(df, k)
155

156
    # select relevant columns
Carl Schaffer's avatar
Carl Schaffer committed
157
    columns = [
Carl Schaffer's avatar
Carl Schaffer committed
158
159
160
161
162
163
164
165
166
167
168
169
        "map",
        "main_routine",
        "ff1",
        "ff2",
        "cal1",
        "filedc",
        "lambda",
        "rotator_offset",
        "xtau",
        "data",
        "pzero",
        "boolean_keywords",
Carl Schaffer's avatar
Carl Schaffer committed
170
    ]
171
172

    result = df[columns].copy()
173
    result.sort_index(inplace=True)
Carl Schaffer's avatar
changes    
Carl Schaffer committed
174
175
    return result

Carl Schaffer's avatar
Carl Schaffer committed
176

Carl Schaffer's avatar
changes    
Carl Schaffer committed
177
178
179
180
181
182
183
##################################################
# Cleaning and ordering of results
##################################################


def extract_main_routine(column):
    """extract name of main routine in a command line"""
Carl Schaffer's avatar
Carl Schaffer committed
184
    return column.str.extract(r"(^[^,\(]+)[,\(]")
Carl Schaffer's avatar
changes    
Carl Schaffer committed
185
186
187
188
189
190


def clean_col(column):
    """make values in a column containing filenames consistent by
    removing all remaining list artefacts, trailing spaces, or
    unnecessary directory information"""
Carl Schaffer's avatar
Carl Schaffer committed
191
192
193
    column = column.str.replace("[", "")
    column = column.str.replace("]", "")
    column = column.str.replace("'", "")
194
    column = column.replace(np.nan, "")
Carl Schaffer's avatar
changes    
Carl Schaffer committed
195
196
197
198
199
200
201
202
    column = column.str.strip()
    column = column.apply(lambda x: basename(x) if x else np.nan)
    return column


def split_file_list(column):
    """transform columns containing a comma separated list into
    multiple columns, clean the columns afterwards"""
Carl Schaffer's avatar
Carl Schaffer committed
203
    df = column.str.split(",", expand=True)
Carl Schaffer's avatar
changes    
Carl Schaffer committed
204
205
206
207
208
209
    for c in df.columns:
        df[c] = clean_col(df[c].astype(str))
    return df


def extract_kw_value_from_command(df, kw):
Carl Schaffer's avatar
Carl Schaffer committed
210
211
    pat = r"{}\s*=\s*([\s\d,\.\[\]+-]+)[,$]".format(kw)
    padded = df["command"] + ","
Carl Schaffer's avatar
changes    
Carl Schaffer committed
212
213
214
215
216
217
218
    extracted = padded.str.extract(pat)[0]
    if kw in df.columns:
        extracted = extracted.astype(df[kw].dtype)
        extracted = extracted.combine_first(df[kw])
    return extracted


219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
def get_best_ffs(runname, l0kws):
    """Get the two best ffs for a given gris run, only flatfields

    Procedure:
    1: Get all flats from same day
    2: Calculate the time difference to the measurement for each flat
    3: Split into 'before' and 'after' sets
    4: Try to successively find the closest flat in each of these categories:
       a: parameters match and delta_t < 2
       b: parameters don't match and delta_t < 2
       c: parameters match and same day
       d: parameters don't match and same day

    Args:
      runname : run identifier formatted as "28may19.001"
      l0kws   : pandas DataFrame containing FITS headers for level0 files

    Returns:
      results : list of flatfields either empty one or two elements
    """
    print(runname)
Carl Schaffer's avatar
Carl Schaffer committed
240
    verbname, runnumber = re.search(r"(\d+\w+\d+)\.(\d\d\d)", runname).groups()
241
242
243
244
    date = pd.to_datetime(verbname).date()
    runnumber = int(runnumber)

    # get all entries matching the day
Carl Schaffer's avatar
Carl Schaffer committed
245
246
247
    day = l0kws[str(date)][
        ["date", "run", "ACCUMULA", "EXPTIME", "MEASURE", "FILENAME"]
    ].copy()
248
249
250
251
252
253

    run = day[day.run == runnumber]
    if run.empty:
        return []

    # select all flatfield measurements
Carl Schaffer's avatar
Carl Schaffer committed
254
    candidates = day[day.MEASURE == "flat field"]
255
256
257
258
    if candidates.empty:
        return []

    # calculate time_differences in hours
Carl Schaffer's avatar
Carl Schaffer committed
259
260
261
262
263
264
265
    candidates["delta_t"] = candidates.FILENAME.apply(
        lambda x: get_time(x, l0kws)
    )
    candidates.delta_t = candidates.delta_t - get_time(
        f"{verbname}.{runnumber:03d}", l0kws
    )
    candidates.delta_t = candidates.delta_t.dt.total_seconds() / (60 * 60)
266
267

    # check if parameters match:
Carl Schaffer's avatar
Carl Schaffer committed
268
    pars_to_check = ["ACCUMULA", "EXPTIME"]
269
    try:
Carl Schaffer's avatar
Carl Schaffer committed
270
        candidates["matching_pars"] = (
Carl Schaffer's avatar
Carl Schaffer committed
271
                run[pars_to_check].values == candidates[pars_to_check]
Carl Schaffer's avatar
Carl Schaffer committed
272
        ).sum(axis=1)
273
274
    except:
        print(runname)
Carl Schaffer's avatar
Carl Schaffer committed
275
        return []
276
277
278
279
280
281
282
283
284
285

    # split into before and after:
    before = candidates[np.sign(candidates.delta_t) == -1.0]
    after = candidates[np.sign(candidates.delta_t) == 1.0]

    hits = []
    for flat_candidates in [before, after]:
        if flat_candidates.empty:
            continue

Carl Schaffer's avatar
Carl Schaffer committed
286
        lt_2 = np.abs(flat_candidates.delta_t) <= 2
Carl Schaffer's avatar
Carl Schaffer committed
287
        match_pars = flat_candidates.matching_pars == len(pars_to_check)
288

289
290
291
292
        combinations = [
            match_pars & lt_2,
            lt_2,
            match_pars,
Carl Schaffer's avatar
bugfix    
Carl Schaffer committed
293
            np.full((len(flat_candidates)), True, dtype=bool),
294
        ]
295
296
297
298

        for c in combinations:
            if c.sum() == 0:
                continue
299
            flat_candidates.delta_t = np.abs(flat_candidates.delta_t)
Carl Schaffer's avatar
Carl Schaffer committed
300
301
            matches = flat_candidates[c].sort_values("delta_t")
            hits.append(matches.iloc[0]["run"])
302
303
304
            break

    # make verbose runname as result
Carl Schaffer's avatar
Carl Schaffer committed
305
    res = [f"{verbname}.{int(h):03d}" for h in hits]
306
307
308
309
310
    return res


def get_time(runname, l0kws):
    try:
Carl Schaffer's avatar
Carl Schaffer committed
311
        verbname, runnumber = runname.split(".")
312
313
314
    except:
        return np.nan
    date = pd.to_datetime(verbname).date()
Carl Schaffer's avatar
Carl Schaffer committed
315
316
317
318
    runnumber = int(runnumber.split("-")[0])
    day = l0kws[str(date)][
        ["date", "run", "ACCUMULA", "EXPTIME", "MEASURE", "DATE-OBS", "UTIME"]
    ]
319
320
321
322
323
    run = day[day.run == runnumber]

    if run.empty:
        return np.nan
    else:
Carl Schaffer's avatar
Carl Schaffer committed
324
        time = run.iloc[0]["UTIME"]
325
326
327
        time = pd.to_datetime(time).time()
        dt = datetime.datetime.combine(date, time)
        return dt
Carl Schaffer's avatar
changes    
Carl Schaffer committed
328
329


330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
def append_command(command, col):
    """append a command to a column, chacks if each field contains a
    value, and either appends separated by a comma or sets the value
    if none is set

    pars:
      command: string to be appended
      col: pd.Series containing the values

    returns:
      col: modified column
    """
    col = col.copy()
    for i, v in col.items():
        if v == v:  # check for nan
            v = ",".join([str(v), command])
        else:
            v = command
        col[i] = v
    return col


352
def pars_match(run_a, run_b, l0kws):
353
354
355
356
357
358
359
360
    """Check if the parameters for two given runs match

    pars:
      run_a, run_b: gris run identifiers DDMMMYY.run such as 24apr14.000
      l0kws: pandas DataFrame containing level0 keywords
    returns:
      result: boolean
    """
361
362
363
364
365
366
367
368
369
370
371
372
    pars = []
    for r in [run_a, run_b]:
        verbname, runnumber = r.split(".")
        date = str(pd.to_datetime(verbname).date())
        runnumber = int(runnumber)
        day = l0kws[date]
        run = day[day.run == runnumber]
        pars.append(run[["EXPTIME"]].values.tolist())
    result = pars[0] == pars[1]
    return result


Carl Schaffer's avatar
changes    
Carl Schaffer committed
373
374
375
376
377
##################################################
# Main block
##################################################


Carl Schaffer's avatar
Carl Schaffer committed
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
def get_l0_runlist(
        archive, force_update=False, storage_path="/dat/schaffer/data/"
):
    """ Get a list of all data files contained in the */level0/
    subfolders, log, cal and textfiles are ignored"""

    fn = "gris_l0_runs_" + archive.replace("/", "_") + ".pkl"
    fn = join(storage_path, fn)
    if exists(fn) and not force_update:
        print(fn)
        with open(fn, "rb") as cache_file:
            l0_runlist = pickle.load(cache_file)
        return l0_runlist
    else:
        runpattern = r"(\d{2}\w{3}\d{2}\.\d{3})"
        pattern = join(archive, "*/level0/*")
        files = glob(pattern)
        # filter files:
        files = list(map(basename, files))
        files = list(
            filter(lambda x: not x[-3:] in ["log", "txt", "cal"], files)
        )
        files = list(filter(lambda x: re.search(runpattern, x), files))

        l0_runlist = list(
            set([re.search(runpattern, r).group(0) for r in files])
        )
        with open(fn, "wb+") as cache_file:
            pickle.dump(l0_runlist, cache_file)
        return l0_runlist


Carl Schaffer's avatar
changes    
Carl Schaffer committed
410
411
if __name__ == "__main__":

Carl Schaffer's avatar
Carl Schaffer committed
412
    archive = "/dat/sdc/gris/"
413
    # get dataframe from calibration files
Carl Schaffer's avatar
changes    
Carl Schaffer committed
414
    result = get_calfile_df(archive=archive)
415

416
417
418
419
420
421
    df2 = result.sort_index()
    new = result.loc["2019-09-01"]
    new.boolean_keywords = new.boolean_keywords.fillna("") + "/lazy/quiet"
    #    with open("/dat/schaffer/projects/grisred/gris_calibration_settings.csv", "a+") as out:
    #        new.to_csv(out, header=False)

Carl Schaffer's avatar
changes    
Carl Schaffer committed
422
    # check which maps are present:
Carl Schaffer's avatar
Carl Schaffer committed
423
    by_run = groupby_function(
Carl Schaffer's avatar
Carl Schaffer committed
424
425
426
        sorted(get_l0_runlist("/dat/sdc/gris/", force_update=True)),
        gris_run_number,
    )
Carl Schaffer's avatar
Carl Schaffer committed
427
    runfiles = [a[0] for _, a in by_run.items()]
428
429

    # remove invalid or obsolete keywords from boolean_keywords column
430
    for invalid_option in ["/filt", "/xtalk", "/show"]:
431
432
433
434
435
436
        result.boolean_keywords = result.boolean_keywords.str.replace(
            f"{invalid_option},", ""
        )
        result.boolean_keywords = result.boolean_keywords.str.replace(
            f",{invalid_option}", ""
        )
437
438
439
440
441
442
443
444
445
446
        result.boolean_keywords = result.boolean_keywords.replace(
            f"{invalid_option}", np.nan
        )

    # ensure that all maps use the correct calibration routine
    result["main_routine"] = result.main_routine.replace(
        "gris_sp_v5", "gris_sp"
    )
    result["main_routine"] = result.main_routine.replace("gris", "gris_v6")
    result["main_routine"] = result.main_routine.replace("gris_v5", "gris_v6")
447
448
449
450
451
452

    # replace empty strings with nan
    result["boolean_keywords"] = result[["boolean_keywords"]].replace(
        "", np.nan
    )

453
454
455
456
457
    # add lazy and quiet keywords
    result["boolean_keywords"] = append_command(
        "/lazy,/quiet", result["boolean_keywords"]
    )

Carl Schaffer's avatar
Carl Schaffer committed
458
    l0kws = get_fits_header_df(runfiles, "/dat/schaffer/data/l0_keywords.pkl")
Carl Schaffer's avatar
changes    
Carl Schaffer committed
459
460

    l0kws = l0kws.reset_index()
Carl Schaffer's avatar
Carl Schaffer committed
461
462
    l0kws["datetime"] = pd.to_datetime(
        l0kws.date.astype(str).str.cat(l0kws.UT, sep=" ")
Carl Schaffer's avatar
Carl Schaffer committed
463
    )
Carl Schaffer's avatar
Carl Schaffer committed
464
    l0kws = l0kws.set_index("datetime")
465

Carl Schaffer's avatar
Carl Schaffer committed
466
467
468
    flats_rec = pd.read_pickle("recommended_flatfields.pkl")
    ffstudy = result[["map", "ff1", "ff2"]]
    ffstudy[["ff1_rec", "ff2_rec"]] = flats_rec
469
470
    fs = ffstudy

Carl Schaffer's avatar
Carl Schaffer committed
471
    for c in ["map", "ff1", "ff2", "ff1_rec", "ff2_rec"]:
472
        print(c)
Carl Schaffer's avatar
Carl Schaffer committed
473
        ffstudy[c + "_time"] = ffstudy[c].apply(lambda x: get_time(x, l0kws))
474

Carl Schaffer's avatar
Carl Schaffer committed
475
476
    for c in ["ff1_time", "ff2_time", "ff1_rec_time", "ff2_rec_time"]:
        ffstudy[c + "diff"] = (
Carl Schaffer's avatar
Carl Schaffer committed
477
478
                                      ffstudy["map_time"] - ffstudy[c]
                              ).dt.total_seconds() / (60 * 60)
479
480
    no_f2 = ffstudy[ffstudy.ff2.isnull()]
    matches = (no_f2.ff1 == no_f2.ff1_rec) | (no_f2.ff1 == no_f2.ff2_rec)
Carl Schaffer's avatar
Carl Schaffer committed
481
482
    spec = l0kws[l0kws["STATES"] == 1][["date", "run", "FILENAME"]]
    spec_runs = spec["FILENAME"].str.extract(r"(\d\d\w\w\w\d\d.\d\d\d)")
483
    result[result.map.isin(spec_runs.values.flatten())].command
484
485
486
487
488
489
490
491

    # check calibration file parameter matching
    for i, row in result.iterrows():
        if "gris_sp" in row["main_routine"]:
            res = True
        else:
            res = pars_match(row["map"], row["cal1"], l0kws)
        result.loc[i, "cal_match"] = res