#!/usr/bin/python3.13
"""
Script to be setup and launch a cron job for the automated running of the known pulsar analysis.

The script requires an initial configuration file. Each time the script will be re-run by cron it will itself
automatically update the times in the configuration file.

(C) Matthew Pitkin 2015
"""

# make print statements python 3-proof
from __future__ import print_function, division

import sys
import os
import ast
import calendar
import numpy as np
import subprocess as sp
import socket
import smtplib
import stat

import argparse
from configparser import RawConfigParser

from lalpulsar import git_version

# try importing astropy
try:
    from astropy.time import Time
except ImportError:
    print(
        "Could not import astropy: make sure astropy is installed (e.g. 'pip install astropy') and in the PYTHONPATH",
        file=sys.stderr,
    )
    sys.exit(1)


# try import python-crontab
try:
    from crontab import CronTab
except ImportError:
    print(
        "Could not import python-crontab: make sure it is installed (e.g. 'pip install python-crontab') and in the PYTHONPATH",
        file=sys.stderr,
    )
    sys.exit(1)


__author__ = "Matthew Pitkin <matthew.pitkin@ligo.org>"
__version__ = "git id %s" % git_version.id
__date__ = git_version.date


# number of seconds for each allowed time step value
tsdic = {
    "hourly": 1440.0,
    "daily": 86400.0,
    "weekly": 7.0 * 86400.0,
    "monthly": 86400.0
    * calendar.monthrange(Time.now().datetime.year, Time.now().datetime.month)[1],
}


# function to remove crontab job
def remove_cron(cronid):
    try:
        cron = CronTab(user=True)
        cron.remove_all(comment=cronid)
        cron.write()
    except:
        print("Warning... count not remove crontab job with comment ID '%s'" % cronid)

    return


# function to send an email
def send_email(FROM, TO, SUBJECT, MESSAGE, server, quitserver=True):
    # set a notication email template
    emailtemplate = "From: {0}\nTo: {1}\nSubject: {2}\n\n{3}"
    message = emailtemplate.format(FROM, TO, SUBJECT, MESSAGE)

    try:
        server.sendmail(FROM, TO, message)
    except:
        print("Warning... unable to send email.")

    if quitserver:
        server.quit()


# main function
if __name__ == "__main__":
    description = """This script will setup and run a cron job for an automated known pulsar pipeline.
A configuration .ini file is required.
"""

    parser = argparse.ArgumentParser(description=description)
    parser.add_argument("--version", action="version", version=__version__)
    parser.add_argument("inifile", help="The configuration (.ini) file")

    # parse input options
    opts = parser.parse_args()

    inifile = opts.inifile

    # check that inifile contains full absolute path
    if not os.path.isabs(inifile):
        print(
            "Error... must supply the full absolute path of the configuration file.",
            file=sys.stderr,
        )
        sys.exit(1)

    startcron = False  # variable to say whether to create the crontab (if this is the first time the script is run then this will be changed to True later)
    cronid = "knopeJob"  # default ID for the crontab job

    # open and parse config file
    cp = RawConfigParser()
    try:
        cp.read(inifile)
    except:
        print(
            "Error... cannot parse configuration file '%s'" % inifile, file=sys.stderr
        )
        sys.exit(1)

    # if configuration file has previous_endtimes option then the cronjob must have started
    if not cp.has_option(
        "times", "previous_endtimes"
    ):  # make sure to start the crontab job
        startcron = True

    # open and parse the run configuration file
    if cp.has_option("configuration", "file"):
        runconfig = cp.get(
            "configuration", "file"
        )  # Get main configuration ini template for the run

        if not os.path.isfile(runconfig):
            print(
                "Error... run configuration file '%s' does not exist!" % runconfig,
                file=sys.stderr,
            )
            if not startcron:
                remove_cron(cronid)  # remove cron job
            sys.exit(1)
    else:
        print("Error... must specify a run configuration '.ini' file", file=sys.stderr)
        if startcron:
            remove_cron(cronid)
        sys.exit(1)

    # check for cron ID
    if cp.has_option("configuration", "cronid"):
        cronid = cp.get("configuration", "cronid")

    # check for kerberos keytab and certificate
    keytab = None
    if cp.has_option("kerberos", "keytab"):
        keytab = cp.get("kerberos", "keytab")

        if cp.has_option("kerberos", "certificate"):
            certificate = cp.get("kerberos", "certificate")

            if cp.has_option("kerberos", "auth_princ"):
                authprinc = cp.get("kerberos", "auth_princ")
            else:
                raise RuntimeError("No kerberos authentication principle")
        else:
            raise RuntimeError("Problem with kerberos certificate")

    cprun = RawConfigParser()
    try:
        cprun.read(runconfig)
    except:
        print("Error... could not read run configuration '.ini' file", file=sys.stderr)
        sys.exit(1)

    # get the time increment for re-running the pipeline on (can be "hourly", "daily", "weekly" or "monthly")
    if cp.has_option("times", "steps"):
        timestep = cp.get("times", "steps")

        if timestep not in ["hourly", "daily", "weekly", "monthly"]:
            errmsg = "Error... 'steps' value in '[times'] must be 'hourly', 'daily', 'weekly' or 'monthly'"
            print(errmsg, file=sys.stderr)
            sys.exit(1)
    else:
        errmsg = "Error... must specify a time step 'steps' value in '[times]'"
        print(errmsg, file=sys.stderr)
        sys.exit(1)

    # try getting email address information (for notifications of e.g. job failures, re-running rescue DAGs, etc)
    email = None
    server = None
    if cp.has_option("notification", "email"):
        try:
            email = cp.get("notification", "email")
        except:
            print(
                "Warning... could not get email address from configuration file. No notifications will be sent."
            )

        if "@" not in email:
            print(
                "Warning... '%s' is not a valid email address. No notifications will be sent."
            )
            email = None

    # set email server
    if email != None:
        try:
            server = smtplib.SMTP("localhost")
        except:
            print(
                "Warning... could not get SMTP server. No notication emails will be sent."
            )
            email = None

    # set email sender (if this fails maybe just hardcode sender to be 'matthew.pitkin@ligo.org')
    FROM = None
    if email != None:
        try:
            HOST = socket.getfqdn()
            USER = os.environ["USER"]
            FROM = USER + "@" + HOST
        except:
            FROM = "matthew.pitkin@ligo.org"

    # Get the current time
    now = Time.now()
    gpsnow = int(now.replicate("gps").value)

    # check for DAG completion in previous analyses
    prevdags = None
    rescuedags = None
    try:
        rundir = cprun.get(
            "analysis", "run_dir"
        )  # get run directory where DAG will have been created
    except:
        rundir = (
            os.getcwd()
        )  # if 'run_dir' was not set then the current working directory will have been used
    if not startcron:
        try:
            prevdags = ast.literal_eval(cp.get("configuration", "previous_dags"))
        except:
            errmsg = "Error... no previous DAG file(s) have been set."
            print(errmsg, file=sys.stderr)
            remove_cron(cronid)  # remove cron job
            if email != None:
                subject = sys.argv[0] + ": Error message"
                send_email(FROM, email, subject, errmsg, server)
            sys.exit(1)

        # check if the last dag has completed yet
        lockfile = prevdags[-1] + ".lock"
        if os.path.isfile(lockfile):
            # DAG has not yet complete, so wait before trying again (by updating the crontab entry)
            try:
                # reset to run again later
                if timestep in [
                    "hourly",
                    "daily",
                ]:  # if hourly or daily just wait until the next run
                    print("Previous DAG not finished. Re-running later")
                    os._exit(
                        0
                    )  # don't use sys.exit(0) as this throws an exception that is caught by "except": https://stackoverflow.com/a/173323/1862861
                else:  # add a day to the crontab job and re-run then
                    cron = CronTab(user=True)
                    for job in cron.find_comment(cronid):
                        thisjob = job  # cron job

                    # get a detlaT for a day
                    t1 = Time("2010-01-01 00:00:00")
                    t2 = Time("2010-01-02 00:00:00")
                    dt = t2 - t1
                    newcrontime = now + dt
                    if timestep == "weekly":
                        thisjob.dow.on(
                            newcrontime.datetime.strftime("%a").upper()
                        )  # get new day of the week
                    else:
                        thisjob.day.on(
                            newcrontime.datetime.day
                        )  # get new month of the year
                    cron.write()
                    os._exit(
                        0
                    )  # don't use sys.exit(0) as this throws an exception that is caught by "except": https://stackoverflow.com/a/173323/1862861
            except:
                errmsg = (
                    "Error... could not reset the crontab to wait for DAG completion."
                )
                print(errmsg, file=sys.stderr)
                remove_cron(cronid)
                if email != None:
                    subject = sys.argv[0] + ": Error message"
                    send_email(FROM, email, subject, errmsg, server)
                sys.exit(1)

        # get any previous rescue DAGs - held in a dictionary and keyed to the original DAG name
        try:
            rescuedags = ast.literal_eval(cp.get("configuration", "rescue_dags"))
            Nrescues = rescuedags[prevdags[-1]]  # get previous number of rescue DAGs
        except:
            rescuedags = None
            Nrescues = 0  # previous number of rescues

        # check if there is a rescue DAG, and if so run it, and wait
        rescuefile = prevdags[-1] + ".rescue"
        if os.path.basename(rescuefile + "%03d" % (Nrescues + 1)) in os.listdir(
            os.path.dirname(prevdags[-1])
        ):
            # if 2 rescue DAGs have already been run then just abort as there's probably some problem
            if Nrescues == 3:
                errmsg = "Error... rescue DAG has been run twice and there are still failures. Automation code is aborting. Fix the problem and then retry"
                print(errmsg, file=sys.stderr)
                remove_cron(cronid)  # remove cron job
                if email != None:
                    subject = sys.argv[0] + ": Error message"
                    send_email(FROM, email, subject, errmsg, server)
                sys.exit(1)

            # update number of previous rescues
            if Nrescues == 0:
                rescuedags = {}
            rescuedags[prevdags[-1]] = Nrescues + 1

            # run rescue DAG
            from subprocess import Popen

            x = Popen(["condor_submit_dag", prevdags[-1]])
            x.wait()
            if x.returncode != 0:
                errmsg = (
                    "Error... unable to submit rescue DAG for '%s'. Automation code is aborting."
                    % prevdags[-1]
                )
                remove_cron(cronid)  # remove cron job
                if email != None:
                    subject = sys.argv[0] + ": Error message"
                    send_email(FROM, email, subject, errmsg, server)
                sys.exit(1)

            # add number of rescue DAGs to configuration file
            cp.set("configuration", "rescue_dags", str(rescuedags))

            # Write out updated configuration file
            fc = open(inifile, "w")
            cp.write(fc)
            fc.close()

            # wait until re-running
            try:
                # reset to run again later
                if timestep in [
                    "hourly",
                    "daily",
                ]:  # if hourly or daily just wait until the next run
                    print("Running rescue DAG")
                    os._exit(
                        0
                    )  # don't use sys.exit(0) as this throws an exception that is caught by "except": https://stackoverflow.com/a/173323/1862861
                else:  # add a day to the crontab job and re-run then
                    cron = CronTab(user=True)
                    for job in cron.find_comment(cronid):
                        thisjob = job  # cron job

                    # get a detlaT for a day
                    t1 = Time("2010-01-01 00:00:00")
                    t2 = Time("2010-01-02 00:00:00")
                    dt = t2 - t1
                    newcrontime = now + dt
                    if timestep == "weekly":
                        thisjob.dow.on(
                            newcrontime.datetime.strftime("%a").upper()
                        )  # get new day of the week
                    else:
                        thisjob.day.on(
                            newcrontime.datetime.day
                        )  # get new month of the year
                    cron.write()
                    os._exit(
                        0
                    )  # don't use sys.exit(0) as this throws an exception that is caught by "except": https://stackoverflow.com/a/173323/1862861
            except:
                errmsg = "Error... could not reset the crontab to wait for rescue DAG completion."
                print(errmsg, file=sys.stderr)
                remove_cron(cronid)
                if email != None:
                    subject = sys.argv[0] + ": Error message"
                    send_email(FROM, email, subject, errmsg, server)
                sys.exit(1)

    # Get the start time of the automated analysis - if not present default to the current time
    if cp.has_option("times", "starttime"):
        try:
            starttime = cp.getint("times", "starttime")
        except:
            errmsg = "Error... could not parse 'starttime' in '[times]'. A start time is required."
            print(errmsg, file=sys.stderr)
            if email != None:
                subject = sys.argv[0] + ": Error message"
                send_email(FROM, email, subject, errmsg, server)
            if not startcron:
                remove_cron(cronid)
            sys.exit(1)

    # check start time is in the past
    if starttime >= gpsnow:
        errmsg = "Error... start time (%f) must be in the past!" % starttime
        print(errmsg, file=sys.stderr)
        if email != None:
            subject = sys.argv[0] + ": Error message"
            send_email(FROM, email, subject, errmsg, server)
        if not startcron:
            remove_cron(cronid)
        sys.exit(1)

    # Get the end time of the automated analysis - if not present default to infinity (never stop!)
    if cp.has_option("times", "endtime"):
        try:
            endtime = cp.getint("times", "endtime")
        except:
            print(
                "Warning... could not parse 'endtime' in '[times]'. Defaulting to Infinity."
            )
            endtime = np.inf
    else:
        # defaulting to infinity
        endtime = np.inf

    # check end time is after start time
    if endtime <= starttime:
        errmsg = "Error... start time is after end time!"
        print(errmsg, file=sys.stderr)
        if not startcron:
            remove_cron(cronid)  # remove cron job
        if email != None:
            subject = sys.argv[0] + ": Error message"
            send_email(FROM, email, subject, errmsg, server)
        sys.exit(1)

    # get a lag time (this will add a lag to gpsnow - if there is a lag between data creation and replication on the various sites a lag may be required, so that the data exists)
    if cp.has_option("times", "lag"):
        try:
            timelag = cp.getint("times", "lag")
        except:
            timelag = 0
    else:
        timelag = 0  # default to no lag

    # check if this is not the first run of the script
    if not startcron:
        try:
            prev_ends = ast.literal_eval(cp.get("times", "previous_endtimes"))
        except:
            errmsg = "Error... cannot parse previous end times list"
            print(errmsg, file=sys.stderr)
            if startcron:
                remove_cron(cronid)  # remove cron job
            if email != None:
                subject = sys.argv[0] + ": Error message"
                send_email(FROM, email, subject, errmsg, server)
            sys.exit(1)

        # update start time in the configuration file to the previous end time
        newstart = prev_ends[-1]

        # if new start time is after end time stop the cronjob and exit
        if newstart >= endtime:
            remove_cron(cronid)
            sys.exit(0)  # end the script

        newend = newstart + int(tsdic[timestep])

        # check if new end time is past the overall end time
        if newend >= endtime:
            newend = endtime
        else:
            # check if the current time is later than the new end time
            if newend < gpsnow - timelag:
                newend = gpsnow - timelag  # set end time to now

        prev_ends.append(newend)

        cp.set(
            "times",
            "previous_endtimes",
            "[" + ", ".join(str(z) for z in prev_ends) + "]",
        )  # output as list
    else:  # create previous end times
        newstart = starttime
        newend = newstart + int(tsdic[timestep])

        # check if the current time is later than the new end time
        if newend >= endtime:
            newend = endtime
        else:
            if newend < gpsnow - timelag:
                newend = gpsnow - timelag  # set end time to now

        cp.set("times", "previous_endtimes", "[" + str(newend) + "]")

    # Get the script for running the full pipeline
    if cp.has_option("configuration", "exec"):
        runscript = cp.get("configuration", "exec")

        if not (os.path.isfile(runscript) and os.access(runscript, os.X_OK)):
            errmsg = (
                "Error... run script '%s' does not exist or is not executable"
                % runscript
            )
            print(errmsg, file=sys.stderr)
            if email != None:
                subject = sys.argv[0] + ": Error message"
                send_email(FROM, email, subject, errmsg, server)
            sys.exit(1)
    else:
        errmsg = "Error... a run script executable 'exec' is required in the '[configuration]' section."
        print(errmsg, file=sys.stderr)
        if not startcron:
            remove_cron(cronid)  # remove cron job
        if email != None:
            subject = sys.argv[0] + ": Error message"
            send_email(FROM, email, subject, errmsg, server)
        sys.exit(1)

    # edit start and end times for the main run configuration script
    if cprun.has_section("analysis"):
        cprun.set("analysis", "starttime", str(newstart))  # set start time
        cprun.set("analysis", "endtime", str(newend))  # set end time
        cprun.set("analysis", "autonomous", "True")  # set 'autonomous' to true
        cprun.set(
            "analysis", "submit_dag", "True"
        )  # set to make sure Condor DAG is submitted

        # create file name for DAG
        dagname = "automated_run_%s-%s" % (str(newstart), str(newend))
        cprun.set(
            "analysis", "dag_name", dagname
        )  # add this dag file name to the automation code configuration script (to be used to check for DAG completion)

        if prevdags != None:
            # add on new DAG file to list
            prevdags.append(os.path.join(rundir, dagname + ".dag"))
            cp.set(
                "configuration",
                "previous_dags",
                "[" + ", ".join(['"%s"' % z for z in prevdags]) + "]",
            )  # output as list
        else:  # output DAG file to previous_dags list
            cp.set(
                "configuration",
                "previous_dags",
                '["' + os.path.join(rundir, dagname + ".dag") + '"]',
            )

        # add the initial start time
        cprun.set("analysis", "autonomous_initial_start", str(starttime))

        # write updated parameters to the file
        fc = open(runconfig, "w")
        cprun.write(fc)
        fc.close()
    else:
        errmsg = (
            "Error... run configuration file '%s' has no '[analysis]' section!"
            % runconfig
        )
        print(errmsg, file=sys.stderr)
        if not startcron:
            remove_cron(cronid)
        if email != None:
            subject = sys.argv[0] + ": Error message"
            send_email(FROM, email, subject, errmsg, server)
        sys.exit(1)

    # create crontab job
    if startcron:
        # check for a virtual environment to run code under
        wov = ""
        if cp.has_option(
            "configuration", "virtualenv"
        ):  # assumes using virtualenvwrapper.sh
            virtualenv = cp.get("configuration", "virtualenv")
            try:
                woh = os.environ["WORKON_HOME"]
                if not os.path.isdir(os.path.join(woh, virtualenv)):
                    print(
                        "Error... if specifying a virtualenv the environment must exist",
                        file=sys.stderr,
                    )
                    sys.exit(1)
                else:
                    wov = "workon " + virtualenv
            except:
                print(
                    "Error... if specifying a virtualenv the 'WORKON_HOME' environment must exist",
                    file=sys.stderr,
                )
                sys.exit(1)
        elif cp.has_option("configuration", "conda"):  # assumes using conda
            virtualenv = cp.get("configuration", "conda")
            wov = "conda activate {}".format(virtualenv)

        # check for .bash_profile, or similar file, to invoke
        profile = None
        if cp.has_option("configuration", "profile"):
            profile = cp.get("configuration", "profile")
        else:
            # default to ${HOME}/.bash_profile
            profile = os.path.join(os.environ["HOME"], ".bash_profile")
        if not os.path.isfile(profile):
            print("Error... no profile file is given", file=sys.stderr)
            sys.exit(1)

        if keytab is not None:
            krbcert = "export KRB5CCNAME={}".format(certificate)
            kinit = "/usr/bin/kinit -a -P -F -k -t {} {}".format(keytab, authprinc)
            ligoproxyinit = "/usr/bin/ligo-proxy-init -k"
        else:
            krbcert = ""
            kinit = ""
            ligoproxyinit = ""

        # output wrapper script
        try:
            # set the cron wrapper script (which will re-run this script)
            cronwrapperscript = os.path.splitext(inifile)[0] + ".sh"
            cronwrapper = (
                """#!/bin/bash
source {0} # source profile
{1}        # enable virtual environment (assumes you have virtualenvwrapper.sh/conda)
{2}        # export kerberos certificate location (if required)
{3}        # generate kerberos certificate (if required)
{4}        # create proxy (if required)
%s {5}     # re-run this script
"""
                % sys.argv[0]
            )

            fp = open(cronwrapperscript, "w")
            fp.write(
                cronwrapper.format(profile, wov, krbcert, kinit, ligoproxyinit, inifile)
            )
            fp.close()
            os.chmod(
                cronwrapperscript, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXOTH
            )  # make executable
        except:
            print(
                "Error... could not output cron wrapper script '%s'."
                % cronwrapperscript,
                file=sys.stderr,
            )
            sys.exit(1)

        try:
            cron = CronTab(user=True)
            job = cron.new(command=cronwrapperscript, comment=cronid)

            # set job time - this will start at the next time step (as we've just run the first step)
            day = now.datetime.day
            month = now.datetime.month
            year = now.datetime.year
            hour = now.datetime.hour
            minute = now.datetime.minute
            dow = now.datetime.strftime("%a").upper()  # day of the week

            if timestep == "hourly":  # required for 'hourly'
                job.minute.on(minute)
            elif timestep == "daily":  # required for 'daily'
                job.minute.on(minute)
                job.hour.on(hour)
            elif timestep == "weekly":  # required for 'weekly'
                job.minute.on(minute)
                job.hour.on(hour)
                job.dow.on(dow)
            elif timestep == "monthly":  # required for 'monthly'
                job.minute.on(minute)
                job.hour.on(hour)
                job.day.on(day)
            else:
                print(
                    "Error... unrecognised 'timestep' option '%s'" % timestep,
                    file=sys.stderr,
                )
                sys.exit(1)

            cron.write()
        except:
            errmsg = "Error... could not create crontab job"
            print(errmsg, file=sys.stderr)
            if email != None:
                subject = sys.argv[0] + ": Error message"
                send_email(FROM, email, subject, errmsg, server)
            sys.exit(1)

    ### RUN ANALYSIS SCRIPT ###
    p = sp.Popen("{0} {1}".format(runscript, runconfig), shell=True)
    out, err = p.communicate()
    if p.returncode != 0:
        errmsg = "Error... problem running main script '%s'.: %s, %s" % (
            runscript,
            out,
            err,
        )
        print(errmsg, file=sys.stderr)
        if not startcron:
            remove_cron(cronid)
        if email != None:
            subject = sys.argv[0] + ": Error message"
            send_email(FROM, email, subject, errmsg, server)
        sys.exit(1)
    ###########################

    # Write out updated configuration file
    fc = open(inifile, "w")
    cp.write(fc)
    fc.close()

    sys.exit(0)
