#!/bin/sh
set -u

script_name=vine_submit_workers
submit_dir=
use_manager_name=0
pwfile=""
arguments=""

poncho_env=""

host=
port=
count=1

cores=
memory=
disk=

specified_resources=0

condor_autosize=
condor_spool=
condor_docker_universe=
condor_class_ads=
condor_requirements=
condor_parameters=
condor_transfer_input_files="vine_worker, cctools_gpu_autodetect"

uge_use_jobarray=0
uge_parameters=""

slurm_parameters=""

batch_option=
parse_arguments_batch=unset_parse_arguments

dry_run=0

condor_show_help()
{
    echo "Condor:"
    echo "  -r,--requirements <reqs>  Condor requirements expression."
    echo "  --class-ad <ad>           Extra condor class ad. May be specified multiple times."
    echo "  --autosize                Condor will automatically size the worker to the slot."
    echo "  --spool                   Spool required input files."
    echo "  --docker-universe <image> Run worker inside <image> using condor's docker universe."
    echo "  -p <parameters>           HTCondor condor_submit parameters."
    echo ""
}

slurm_show_help()
{
    echo "SLURM:"
    echo "  -p <parameters>          SLURM sbatch parameters."
    echo ""
}

uge_show_help()
{
    echo "UGE:"
    echo "  -j                       Use job array to submit workers."
    echo "  -p <parameters>          UGE qsub parameters."
    echo ""
}

show_help()
{
    exit_status=$1

	echo "Use: $script_name [batch selection] [worker options] [batch specific options] <servername> <port> <num-workers>"
	echo "         or"
	echo "     $script_name [batch selection] [worker options] --manager-name <name> [batch specific options] <num-workers>"

    echo " "
    echo "batch selection:"
    echo "  -T, --batch-type <batch>  Name of the batch system to submit workers. Out of (condor, slurm, uge)."
    echo ""

	echo "worker options:"

	echo "  -M,--manager-name <name>  Name of the preferred manager for worker."
	echo "  -C,--catalog <catalog>    Set catalog server to <catalog>. <catalog> format: HOSTNAME:PORT."
	echo "  -t,--timeout <time>       Abort after this amount of idle time. (default=900s)."
	echo "  -d,--debug <subsystem>    Enable debugging on worker for this subsystem (try -d all to start)."
	echo "  -w,--tcp-window-size <size>  Set TCP window size."
	echo "  -i,--min-backoff <time>   Set initial value for backoff interval when worker fails to connect to a manager. (default=1s)"
	echo "  -b,--max-backoff <time>   Set maxmimum value for backoff interval when worker fails to connect to a manager. (default=60s)"
	echo "  -z,--disk-threshold <size >   Set available disk space threshold (in MB). When exceeded worker will clean up and reconnect. (default=100MB)"
	echo "  -A,--arch <arch>          Set architecture string for the worker to report to manager instead of the value in uname."
	echo "  -O,--os <os>              Set operating system string for the worker to report to manager instead of the value in uname."
	echo "  -s,--workdir <path>       Set the location for creating the working directory of the worker."
	echo "  -P,--password <pwfile>    Password file to authenticate workers to manager."
	echo "  --ssl                     Use ssl to communicate with manager."
	echo "  --cores <num>             Set the number of cores each worker should use (0=auto). (default=1)"
	echo "  --memory <size>           Manually set the amonut of memory (in MB) reported by this worker."
	echo "  --disk <size>             Manually set the amount of disk (in MB) reported by this worker."
	echo "  --scratch-dir             Set the scratch directory location created on the local machine (default=/tmp/USER-workers)." 
	echo "  -E,--worker-options <str> Extra options passed to vine_worker."
	echo "  --poncho-env <file.tar.gz>  Run each worker inside this poncho environment."
	echo "  -h,--help                 Show this help message."
	echo ""
	echo "batch specific options:"

    condor_show_help
    slurm_show_help
    uge_show_help

	exit "$exit_status"
}

unset_parse_arguments() {
    echo "Batch system should be selected before any batch configuration option." 1>&2
    echo "Choose one of: -Tcondor -Tuge -Tslurm" 1>&2
    exit 1
}

# Condor specific
condor_setup()
{
    # set default values:
    cores=${cores:-1}
    memory=${memory:-512}   # MB, e.g. 0.5 GB
    disk=${disk:-1024}      # MB, e.g. 1   GB

    # convert disk to kB to make what follows easier, as condor want disk in kB
    disk=$((disk*1024))

    if [ "$condor_autosize" = 1 ]
    then
        cores="ifThenElse($cores > TotalSlotCpus, $cores, TotalSlotCpus)"
        memory="ifThenElse($memory > TotalSlotMemory, $memory, TotalSlotMemory)"
        disk="ifThenElse($disk > TotalSlotDisk, $disk, TotalSlotDisk)"
    fi

    # check if host is localhost - if so then notify the user of an error
    if [ "$host" = localhost ]
    then
        echo "Using localhost with condor workers may lead to unintended consequences, please specify the IP address instead" 1>&2
        exit 1
    fi

    # rewrite cores, memory, disk with size of given slot. (sometimes the slot
    # given is larger than the minimum requested).
    arguments="--cores \$\$([TARGET.Cpus]) --memory \$\$([TARGET.Memory]) --disk \$\$([TARGET.Disk/1024]) $arguments"
}

condor_parse_arguments()
{
    consumed=2
    case $1 in
        -r|--requirements)
            shift
            condor_requirements="$condor_requirements $1"
        ;;

        --class-ad)
            shift
            condor_class_ads="$condor_class_ads\n$1"
        ;;

        --autosize)
            condor_autosize=1
            consumed=1
        ;;

        --spool)
            condor_spool="--spool"
            consumed=1
        ;;

        --docker-universe)
            shift
            condor_docker_universe="$1"
        ;;

        -p)
            shift
            condor_parameters="$condor_parameters $1"
        ;;

        *)
            consumed=0
        ;;
    esac

    return "$consumed"
}

condor_set_up_password_file()
{
    condor_transfer_input_files="${condor_transfer_input_files}, $pwfile"
}

condor_submit_workers_command()
{
    condor_setup

    if [ -n "$poncho_env" ]; then
        condor_transfer_input_files="${condor_transfer_input_files}, poncho_package_run, $poncho_env"

        executable="poncho_package_run"
        arguments="-e $poncho_env -- ./vine_worker $arguments $host $port"
    else
        executable="vine_worker"
        arguments="$arguments $host $port"
    fi

    if [ -n "${condor_docker_universe}" ]
    then
        cat > condor_submit_file <<EOF
universe = docker
docker_image = ${condor_docker_universe}
EOF
    else
        cat > condor_submit_file <<EOF
universe = vanilla
EOF
    fi

    cat >> condor_submit_file <<EOF
executable = $executable
arguments = $arguments
transfer_input_files = ${condor_transfer_input_files}
should_transfer_files = yes
when_to_transfer_output = on_exit
output = worker.\$(CLUSTER).\$(PROCESS).output
error = worker.\$(CLUSTER).\$(PROCESS).error
log = workers.log
+JobMaxSuspendTime = 0
$(printf "%b" "${condor_class_ads}" 2> /dev/null)

# Some programs assume some variables are set, like HOME, so we export the
# environment variables with the job.  Comment the next line if you do not want
# the environment exported.
getenv = true
EOF

    if [ -n "$condor_requirements" ]; then
        echo "requirements = ${condor_requirements}" >> condor_submit_file
    fi

    echo "request_cpus = ${cores}" >> condor_submit_file

    #Memory in megabytes
    echo "request_memory = ${memory}" >> condor_submit_file

    #Disk in kilobytes
    echo "request_disk = ${disk}" >> condor_submit_file


    echo "queue $count" >> condor_submit_file

    if [ "${dry_run}" = 1 ]
    then
        condor_submit_cmd="echo condor_submit"
        echo "######## condor_submit_file ########"
        cat condor_submit_file
        echo "####################################"
    else
        condor_submit_cmd=condor_submit
    fi

    ${condor_submit_cmd} ${condor_spool} ${condor_parameters} condor_submit_file
}

# SLURM specific
slurm_setup()
{
    if [ -z "$cores" ] || [ "$cores" = 0 ]
    then
        slurm_parameters="$slurm_parameters --exclusive"
    else
        slurm_parameters="$slurm_parameters --cpus-per-task $cores"
    fi
}

slurm_parse_arguments()
{
    consumed=2
    case $1 in
        -p)
            shift
            slurm_parameters="$slurm_parameters $1"
        ;;

        *)
            consumed=0
        ;;
    esac
    
    return "$consumed"
}

slurm_submit_workers_command()
{
    slurm_setup

    if [ -n "$poncho_env" ]; then
        worker_command="${submit_dir}/poncho_package_run -e ${submit_dir}/$(basename "$poncho_env") -- ${submit_dir}/vine_worker $arguments $host $port"
    else
        worker_command="${submit_dir}/vine_worker $arguments $host $port"
    fi

    if [ "${dry_run}" = 0 ]
    then
        sbatch=$(which sbatch 2>/dev/null)
        if [ $? != 0 ]
        then
            echo "$0: please add 'sbatch' to your PATH." 1>&2
            exit 1
        fi
    else
        sbatch="echo sbatch"
        echo "######## worker.sh ########"
        echo "#!/bin/sh"
        echo "${worker_command}"
        echo "####################################"
    fi

    to_submit=$count
    while [ "$to_submit" -gt 0 ]
    do
        to_submit=$((to_submit-1))
        ${sbatch} --job-name vineWorker --ntasks=1 --nodes=1 $slurm_parameters <<EOF
#!/bin/sh
${worker_command}
EOF
    done

}

# UGE specific
uge_setup()
{
    if [ -z "$cores" ] || [ "$cores" = 0 ]
    then
        cores=1
    fi

	if [ $specified_resources = 1 ]
	then
		echo "Worker resources were manually specified. Remember to also describe your" 1>&2
        echo "resources according to your local qsub system e.g., -p '-pe smp 4'." 1>&2
        echo "See also the --uge-parameter option in the configure script when manually compiling CCTools." 1>&2
	fi

}

uge_parse_arguments()
{
    consumed=2
    case $1 in
        -j)
            uge_use_jobarray=1
            consumed=1
        ;;
        -p)
            shift
            uge_parameters="$uge_parameters $1"
        ;;
        *)
            consumed=0
        ;;
    esac

    return "$consumed"
}

uge_submit_workers_command()
{
    uge_setup

    if [ -n "$poncho_env" ]; then
        worker_command="${submit_dir}/poncho_package_run -e ${submit_dir}/$(basename "$poncho_env") -- ${submit_dir}/vine_worker $arguments $host $port"
    else
        worker_command="${submit_dir}/vine_worker $arguments $host $port"
    fi

    cat >worker.sh <<EOF
#!/bin/sh

${worker_command}
EOF

    chmod 755 worker.sh

    if [ "${dry_run}" = 0 ]
    then
        qsub=$(which qsub 2>/dev/null)
        if [ $? != 0 ]
        then
            echo "$0: please add 'qsub' to your PATH." 1>&2
            exit 1
        fi
    else
        qsub="echo qsub"
        echo "######## worker.sh ########"
        cat worker.sh
        echo "####################################"
    fi


    if [ "$uge_use_jobarray" = 1 ]
    then
        ${qsub} -t 1-"$count":1 -cwd $uge_parameters worker.sh
    else
        n=0
        while [ "$n" -lt "$count" ]
        do
            n=$((n+1))
            ${qsub} -cwd $uge_parameters worker.sh
        done
    fi
}


parse_arguments_general()
{
    consumed=1
    case $1 in
        -T | --batch-type)
            shift
            if [ -n "${batch_option}" ]
            then
                echo "-T cannot be specified twice."
                exit 1
            fi
            batch_option=$1
            case $batch_option in
                condor)
                    parse_arguments_batch=condor_parse_arguments
                ;;
                uge)
                    parse_arguments_batch=uge_parse_arguments
                ;;
                slurm)
                    parse_arguments_batch=slurm_parse_arguments
                ;;
                *)
                    echo "$batch_option is not a valid batch. Choose from condor, uge and slurm"
                    exit 1
                ;;
            esac
            consumed=2
        ;;
        --dry-run)
            dry_run=1
        ;;
        -h | --help)
            show_help 0
        ;;
        *)
            consumed=0
        ;;
    esac

    return "$consumed"
}

parse_arguments()
{
    # normalize arguments
    while [ $# -gt 0 ]
    do
        org_opt=$1
        case $org_opt in
            -[A-z]?*)
                # turn -Xarg nto -X arg
                new_opt=$(echo "$1" | sed -e 's/\(-.\).*/\1/')
                shift
                set -- "${new_opt}" "${org_opt#"${new_opt}"}" "$@"
                continue
            ;;
            --*=*)
                # turn --X=arg into --X arg
                new_opt=$(echo "$1" | sed -e 's/\(--.*\)=.*/\1/')
                shift
                set -- "${new_opt}" "${org_opt#"${new_opt}"=}" "$@"
                continue
            ;;
        esac

        parse_arguments_general "$@"
        consumed=$?
        if [ "${consumed}" -gt 0 ]
        then
            shift ${consumed}
            continue
        fi

        parse_arguments_worker "$@"
        consumed=$?
        if [ "${consumed}" -gt 0 ]
        then
            shift ${consumed}
            continue
        fi

        ${parse_arguments_batch} "$@"
        consumed=$?
        if [ "${consumed}" -gt 0 ]
        then
            shift ${consumed}
            continue
        fi

        case $1 in
            -*)
                echo "option $1 is not recognized." 1>&2
                show_help 1
            ;;
            *)
                # reached [localhost port] num_workers
                break
            ;;
        esac
    done

	set_up_manager_address "$@"
}

parse_arguments_worker()
{
    consumed=2  # most option consume two args
    case $1 in
        -a | --advertise)
            # Leave here for backwards compatibility
            consumed=1
        ;;
        -M | --manager-name | --master-name)
            shift
            arguments="$arguments -M $1"
            use_manager_name=1
        ;;
        -N | --name)
            # Leave here for backwards compatibility
            shift
            arguments="$arguments -M $1"
            use_manager_name=1
        ;;
        -C | --catalog)
            shift
            arguments="$arguments -C $1"
        ;;
        -t | --timeout)
            shift
            arguments="$arguments -t $1"
        ;;
        -d | --debug)
            shift
            arguments="$arguments -d $1"
        ;;
            -w | --tcp-window-size)
            shift
            arguments="$arguments -w $1"
        ;;
        -i | --min-backoff)
            shift
            arguments="$arguments -i $1"
        ;;
        -b | --max-backoff)
            shift
            arguments="$arguments -b $1"
        ;;
        -z | --disk-threshold)
            shift
            arguments="$arguments -z $1"
        ;;
        -A | --arch)
            shift
            arguments="$arguments -A $1"
        ;;
        -O | --os)
            shift
            arguments="$arguments -O $1"
        ;;
        -s | --workdir)
            shift
            arguments="$arguments -s $1"
        ;;
        --scratch-dir)
            shift
            submit_dir="$1/${USER}-workers"
        ;;
        -P | --password)
            shift
            pwfile="$1"
            arguments="$arguments -P $pwfile"
        ;;
        --ssl)
            arguments="$arguments --ssl"
            consumed=1
        ;;
        --cores)
            shift
            arguments="$arguments --cores $1"
            cores="$1"
            specified_resources=1
        ;;
        --memory)
            shift
            arguments="$arguments --memory $1"
            memory="$1"
            specified_resources=1
        ;;
        --disk)
            shift
            arguments="$arguments --disk $1"
            disk="$1"
            specified_resources=1
        ;;
        -E | --worker-options)
            shift
            arguments="$arguments $1"
        ;;
        --poncho-env)
            shift
            poncho_env="$1"
        ;;
        -h | --help)
            show_help 0
        ;;
        *)
            consumed=0
        ;;
    esac

    return "$consumed"
}


set_up_manager_address()
{
    host=
    port=
    count=

	if [ $use_manager_name = 0 ]; then
		if [ $# -ne 3 ] ; then
			echo "expected 3 arguments: <servername> <port> <num-workers>, but found $#: $*" 1>&2
			echo "To view the help message, type: $script_name -h" 1>&2
			exit 1
		fi

		host=$1
		port=$2
		count=$3
	else
		if [ $# = 0 ]
		then
			echo "<num-workers> is missing"  1>&2
            exit 1
        elif [ $# = 1 ]
        then
            count=$1
        elif [ $# = 3 ]
        then
            host=$1
            port=$2
            count=$3
        else
			echo "expected 1 argument for <num-workers>, but found $#: $*" 1>&2
			echo "To view the help message, type: $script_name -h" 1>&2
			exit 1
		fi
	fi
}

set_up_working_directory()
{
	# Set up a local temporary directory to manage the log files.
	# home directories on shared filesystems are often not accessible

    if [ -z "${submit_dir}" ]
    then
        if [ "$batch_option" = condor ]
        then
            submit_dir=/tmp/"${USER}"-workers
        else
            submit_dir="${PWD}/${USER}"-workers
        fi
    fi


	echo "Creating worker submit scripts in ${submit_dir}..."
	mkdir -p "${submit_dir}"

	# Copy the worker executable into the temporary directory,
	# for similar reasons.

    worker=$(which vine_worker 2>/dev/null)
	if [ $? != 0 ]
	then
		echo "$0: please add 'vine_worker' to your PATH." 1>&2
		exit 1
	fi

    gpu_detection=$(which cctools_gpu_autodetect 2>/dev/null)
	if [ $? != 0 ]
	then
		echo "$0: could not find cctools_gpu_autodetect in PATH. gpus will not be automatically detected." 1>&2
	else
		cp "$gpu_detection" "${submit_dir}"
	fi

	if [ -n "$poncho_env" ]; then
		poncho_bin=$(which poncho_package_run 2>/dev/null)
		if [ $? != 0 ]
		then
			echo "$0: please add 'poncho_package_run' to your PATH." 1>&2
			exit 1
		fi

		cp "$poncho_bin" "${submit_dir}"
		cp "$poncho_env" "${submit_dir}"
	fi

    cp "${worker}" "${submit_dir}"
	set_up_password_file

	cd "${submit_dir}" || exit 1
}

set_up_password_file()
{
	# If a password file has been selected, check for existence,
	# copy it into the submission directory, then add it to
	# the transfer input files list.

	if [ "X${pwfile}" != X ]
	then
		if [ ! -f "$pwfile" ]
		then
			echo "$script_name password file $pwfile not found" 1>&2
			exit 1
		fi

		cp "$pwfile" "${submit_dir}"

        if [ "$batch_option" = condor ]
        then
		    condor_set_up_password_file
        fi
	fi
}

submit_workers_command()
{
    case $batch_option in
        "condor")
            condor_submit_workers_command
        ;;
        "slurm")
            slurm_submit_workers_command
        ;;
        "uge")
            uge_submit_workers_command
        ;;
        *)
            echo "Batch option \"$batch_option\" is not supported." 1>&2
            show_help 1
        ;;
    esac
}

submit_workers()
{
	parse_arguments "$@"
	set_up_working_directory
	submit_workers_command

	exit $?
}

submit_workers "$@"
