#!/usr/bin/python3

# Copyright (C) 2001 Matt Zimmerman <mdz@debian.org>
# Copyright (C) 2007-2020 Javier Fernandez-Sanguino <jfs@debian.org>
# - included patch from Justin Pryzby <justinpryzby_AT_users.sourceforge.net>
#   to work with the latest Lsof
# - modify to reduce false positives by not
#   complaining about deleted inodes/files under /tmp/, /var/log/,
#   /var/run or named   /SYSV.
# - introduced a verbose option
# - included code from 'psdel' contributed by Sam Morris <sam_AT_robots.org.uk> to
#   make the program work even if lsof is not installed
#   (available at https://robots.org.uk/src/psdel)
# Copyright (C) 2013-2020 Axel Beckert
# Copyright (C) 2022 Richard Lewis
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA
#
# On Debian systems, a copy of the GNU General Public License may be
# found in /usr/share/common-licenses/GPL.

import sys
import os, errno
import re
import sys
import subprocess
import getopt
import shutil
import collections

def checkroot():
    if os.getuid() != 0:
        sys.stderr.write('WARNING: This program should be run as root: information will be incomplete\n')

def init_is_systemd():
    """Is systemd being used"""
    return os.path.exists('/run/systemd/system')

def cgroups2():
    """Are cgroups(7) v2 in use"""
    if init_is_systemd():
        return subprocess.run(["stat", "-fc", "%T", "/sys/fs/cgroup"],
                              capture_output=True,
                              encoding="utf-8",
                              errors="ignore").stdout.strip() == "cgroup2fs"
    else:
        return False


def usage(code):
    msg="""Usage: checkrestart [OPTIONS]
Shows processes using deleted files.
Options:
 -h, --help                   Show this message and exit
 -f, --show-files             Show the deleted files
 -v, --verbose                Show more information
 -d, --debug                  Show extensive information (for debugging)
 -t, --terse                  Show only the number of processes needing
                               a restart
 -m, --machine                Produce machine-readable output

 -p, --packages               Only look for files from a package
 -a, --all                    Report all deleted files, regardless of
                               location. Cancels all uses of -x

 -n, --no-lsof                Do not use lsof to find deleted files
 -b, --blocklistfile FILE     Ignore files matching patterns in FILE
 -x, --exclude TYPE:REGEXP    Ignore MATCH (a python regexp).
  The argument can be:

   package:xxx                ~ ignore processes from package xxx
   program:/path/to/program   ~ ignore program
   pid:N                      ~ ignore process with PID N
   file:/path/to/file         ~ ignore anything using file
   unit:s.service             ~ ignore all processes started by s.service

   dangerous-unit:s.service   ~ Advise caution before restarting s.service
                                (processes are still reported, but the
                                command is shown as commented out)

   suggested-unit:s.service   ~ do not guess that s can restart anything
                                 (only applies to programmes not started
                                  by any unit)

   dangerous-initscript:x     ~ advise caution before restarting the
                                  initscript x (processes are still
                                  reported but the command is shown as
                                  commented out)

   suggested-initscript:x     ~ do not suggest /etc/init.d/x can restart
                                  anything

    Everything after the ':' is a python regexp. 'file:' can be omitted
    The last two only affect what commands are shown, not what is reported

 -i, --exclude-package PACKAGE      Same as -x package:PACKAGE
     --exclude-program PATH         Same as -x program:PATH
 -e, --exclude-pid PID              Same as -x pid:PID
     --exclude-file PATH            Same as -x file:PATH
     --exclude-unit UNIT            Same as -x unit:UNIT
     --dangerous-unit UNIT          Same as -x dangerous-unit:UNIT
     --dont-suggest-unit U          Same as -x suggested-unit:U
     --dangerous-initscript X       Same as -x dangerous-initscript X
     --dont-suggest-initscript X    Same as -x suggested-initscript:X
"""
    if code==0:
        print(msg)
    else:
        sys.stderr.write(msg+'\n')
    sys.exit(code)

def _parse_excludes(a,exclude,suggestions):
    """Add a to exclude or suggestions, choosing the key based on the prefix in a.
If no prefix, assume 'file:'"""
    for prefix in ["package:", "program:","pid:",
                   "file:",
                   "unit:",
                   ]:
        if _add_exclude(prefix,a,exclude):
            return None
    for prefix in ["suggested-unit:","dangerous-unit:",
                   "suggested-initscript:","dangerous-initscript:",
                   ]:
        if _add_exclude(prefix,a,suggestions):
            return None
    _add_exclude("file:",f"file:{a}",exclude)


def _add_exclude(prefix,s,exclude):
    """If s starts with prefix, remove it and add remainder to exclude[prefix].
Returns True iff something was added to exclude."""
    if s.startswith(prefix):
        a=s.removeprefix(prefix)
        if DEBUG: print(f"Adding exclusion: {prefix} {a}")
        try:
            exclude[prefix].append(re.compile(a))
        except re.error as e:
            sys.stderr.write(f"ERROR: syntax error in '{prefix}' regexp {repr(a)}: {e}\n")
            sys.exit(3)
        return True
    else:
        return False

def main():
    """Parse options, find processes using deleted files, filter, group, and output how to restart"""
    # lc_all_c_env is used by calls to dpkg-query (it does not really need to be a global variable)
    global lc_all_c_env
    lc_all_c_env = os.environ
    lc_all_c_env['LC_ALL'] = 'C'

    useLsof = True

    using_systemd=init_is_systemd()
    using_cgroups2=cgroups2()

    # exclude is a dict of prefix=>[Regexp] prefix corresponds to the
    # start of an argument to -x (see man page or usage). For example,
    # exclude["pid"] will exclude by pid.
    exclude=collections.defaultdict(list)

    # like exclude but suggested and dangerous - ie things not reset by -a
    suggestions=collections.defaultdict(list)
    # added as prefix for commands that are suggested to be dangerous
    caution="# CAUTION: "

    # Only report deleted files that were installed by a .deb package
    onlyPackageFiles = False

    # cache of exclusions: dict of prefix=>dict(what=>Bool)
    # used to cache whether a package, unit, etc is ignored or not-to-be-suggested or dangerous
    cache=collections.defaultdict(dict)

    # files from which to read extra entries for ignored*
    ignorelistFiles = []

    # default settings (files in /etc/checkrestart)
    default_exclude=["local-exclude.conf","exclude.conf"]
    default_suggestions=["local-suggestions.conf","suggestions.conf"]

    # Parse options
    try:
        opts, args = getopt.getopt(sys.argv[1:],
                                   "hfvdtmpanb:x:e:i:",
                                   ["help",
                                    "show-files",
                                    "verbose", "debug", "terse","machine",
                                    "no-lsof","nolsof",
                                    "packages", "all",
                                    "blocklistfile=","blacklist=",
                                    "exclude=",
                                    "exclude-package=","ignore=",
                                    "exclude-unit=", "exclude-program=",
                                    "exclude-pid=","excludepid=","exclude-file=",
                                    "dangerous-unit=",
                                    "dont-suggest-unit=",
                                    "dangerous-initscript=",
                                    "dont-suggest-initscript=",
                                    "no-systemd","no-cgroups2",
                                    ])
    except getopt.GetoptError as err:
        # eg: "option -y not recognized"
        sys.stderr.write(f"{err}\n")
        usage(3)

    ## output
    # DEBUG is used in many functions so is left global (-d option),
    # although it would be better to
    global DEBUG
    DEBUG=False

    # verbose output (-v option) explains why things are ignored.
    verbose = False
    # Generate terse (one-line) output, disabled by default (-t option)
    terseOutput = False
    # Generate machine parsable output, disabled by default (-m option)
    machineOutput = False

    # Whether to list the (non-ignored) deleted files (-f option)
    showfiles=False

    for o, a in opts:
        if DEBUG: print(f"Option {o}; arg: {a}")
        if o in ("-h", "--help"):
            usage(0)
        elif o in ("-f","--show-files"):
            showfiles=True
        elif o in ("-v", "--verbose"):
            verbose = True
            showfiles=True
        elif o in ("-d", "--debug"):
            DEBUG=True
            verbose=True
            showfiles=True
        elif o in ("-t", "--terse"):
            terseOutput = True
        elif o in ("-m", "--machine"):
            machineOutput = True
        elif o in ("-p", "--packages"):
            onlyPackageFiles = True
        elif o in ("-a", "--all"):
            exclude=collections.defaultdict(list)
            ignorelistFiles=[]
            default_exclude=[]
        elif o in ("-n", "--no-lsof","--nolsof"):
            useLsof = False
        elif o in ("-b", "--blocklistfile","--blacklist"):
            ignorelistFiles.append(a)
        elif o in ("-x","--exclude"):
            _parse_excludes(a,exclude,suggestions)
        elif o in ("-i", "--exclude-package","--ignore"):
            _add_exclude("package:",f"package:{a}",exclude)
        elif o in ("--exclude-program",):
            _add_exclude("program:",f"program:{a}",exclude)
        elif o in ("-e", "--exclude-pid"):
            _add_exclude("pid:",f"pid:{a}",exclude)
        elif o in ("--exclude-file",):
            _add_exclude("file:",f"file:{a}",exclude)
        elif o in ("--exclude-unit",):
            _add_exclude("unit:",f"unit:{a}",exclude)
        elif o in ("--dangerous-unit",):
            _add_exclude("dangerous-unit:",f"dangerous-unit:{a}",suggestions)
        elif o in ("--dont-suggest-unit",):
            _add_exclude("suggested-unit:",f"suggested-unit:{a}",suggestions)
        elif o in ("--dangerous-initscript",):
            _add_exclude("dangerous-initscript:",f"dangerous-initscript:{a}",suggestions)
        elif o in ("--dont-suggest-initscript",):
            _add_exclude("suggested-initscript:",f"suggested-initscript:{a}",suggestions)
        elif o in ("--no-systemd",):
            # undocumented: debug only
            using_systemd=False
            using_cgroups2=False
        elif o in ("--no-cgroups2",):
            # undocumented: debug only
            using_cgroups2=False
        else:
            sys.stderr.write(f"INTERNAL ERROR: Unhandled option: {o}\n")
            usage(3)

    checkroot()


    load_default=default_exclude+default_suggestions
    if DEBUG: print(f"Default blocklist: {load_default}")
    for f in load_default:
        f=f"/etc/checkrestart/{f}"
        if os.path.exists(f):
            if DEBUG: print(f"Using default blocklist: {f}")
            ignorelistFiles.append(f)

    for f in ignorelistFiles:
        if DEBUG: print(f"Reading blocklist file: {f}")
        try:
            with open(f, 'r') as ignorelistFile:
                for line in ignorelistFile.readlines():
                    a=line.rstrip()
                    if a.startswith("#") or a=="":
                        continue
                    _parse_excludes(a,exclude,suggestions)
        except OSError as e:
            sys.stderr.write(f"ERROR: Could not read file: {f}: {e}\n")
            sys.exit(3)


    if DEBUG:
        print(f"Exclusions: {exclude}")
        print(f"Excluded from suggestions: {suggestions}")

    if DEBUG:
        print(f"Systemd: {using_systemd}")
        print(f"cgroups v2: {using_cgroups2}")

    # Check if we have lsof, if not, use an alternative mechanism
    if not shutil.which('lsof'):
        useLsof=None
        if verbose:
            sys.stderr.write("WARNING: lsof is not available. Using alternative mechanism to find processes using deleted files.\n")
    if not useLsof:
        using_deleted = procfilescheck(exclude["pid:"],using_cgroups2)
    else:
        using_deleted = lsoffilescheck(using_cgroups2)

    if verbose:
        if machineOutput:
            print(f"PROCESSES_ALL: {len(using_deleted)}")
        else:
            print(f"(Found {len(using_deleted)} process(es) using deleted files)")

    # 1. filter out ignored programs, pids, units, and files
    # 2. Group processes by program (name of executable) or unit
    n=0 # number of non-ignored processes
    programs=collections.defaultdict(list) # program_name => [processes]
    units_that_need_restart={} # unit_name => [processes]

    for process in using_deleted:
        if is_ignored(process.program,exclude,"program:",onlyPackageFiles,cache):
            show_ignore(f"program {process.program}",
                        "program", process, verbose, machineOutput)
        elif is_ignored(str(process.pid),exclude,"pid:",False,cache):
            show_ignore(f"PID {process.pid}",
                        "pid", process, verbose, machineOutput)
        elif process.systemd_unit and is_ignored(process.systemd_unit,exclude,"unit:",False,cache):
            # 4th arg to is_ignore must be False, as some units, eg: apache2, are generated from initscripts
            show_ignore(f"process started by unit {process.systemd_unit}",
                        f"unit\t{process.systemd_unit}",
                        process, verbose, machineOutput)
        else:
            interesting_files=[]
            for f in process.files:
                if is_ignored(f, exclude,"file:",onlyPackageFiles,cache):
                    show_ignore(f"deleted file {f}",
                                f"file\t{f}",
                                process, verbose, machineOutput)
                else:
                    interesting_files.append(f)

            if len(interesting_files)>0:
                n=n+1
                process.files=sorted(interesting_files)

                unit=process.systemd_unit
                if unit:
                    if verbose:
                        if machineOutput:
                            print(f"from_unit\t{process}\t{unit}")
                        else:
                            print(f"{process} was started by systemd unit: {unit}")
                    if units_that_need_restart.get(unit):
                        units_that_need_restart[unit].processes.append(process)
                    else:
                        u=SystemdUnit(unit)
                        if u.restart_command:
                            units_that_need_restart[unit]=u
                            units_that_need_restart[unit].processes.append(process)
                        else:
                            if DEBUG: print(f"{process} was started by systemd. but we do not know how to restart it)")
                            programs[process.program].append(process)
                else:
                    programs[process.program].append(process)
            else:
                # all files have been excluded, so hide the process
                show_ignore("all files from","all_files",process,verbose,machineOutput)

    # list of SystemdUnit
    systemdUnits=sorted(units_that_need_restart.values(),key=lambda p:p.cgroup_path)


    # Group programs by package. programs is modified so that anything
    # packaged is deleted - anything remaining in programs is
    # unpackaged.
    packages = group_by_package(programs)

    if verbose:
        for packagename,package in packages.items():
            for process in package.processes:
                if machineOutput:
                    print(f"from_package\t{process}\t{packagename}")
                else:
                    print(f"{process} is part of package: {packagename}")

    for program_name,processes in programs.items():
        if verbose:
            if machineOutput:
                print(f"unpackaged\t{process}")
            else:
                print(f"{process} is not part of any package or systemd unit")
        name=f"Unpackaged: {os.path.basename(program_name)}" # todo - this does conflate /bin/sh and /local/bin/sh
        packages.setdefault(name,Package(name,is_fake=True))
        packages[name].processes.extend(processes)

    sys.stdout.flush()

    # Remove ignored packages - modify n accordingly
    interesting_packages=[]
    for p in packages:
        if is_ignored(p,exclude,"package:",False,cache):
            num_ignored=len(packages[p].processes)
            n=n-num_ignored
            show_ignore(f"all ({num_ignored}) programs from package", f"package\t{num_ignored}", p,
                        verbose,machineOutput)
        else:
            interesting_packages.append(packages[p])

    packages=sorted(interesting_packages,key=lambda p:p.name)

    # check if reboot needed
    flag="/run/reboot-required"
    pkgs=f"{flag}.pkgs"
    if os.path.exists(pkgs):
        reboot_needed=True
        if machineOutput:
            msg=["REBOOT_NEEDED: Yes"]
        else:
            msg=["The system needs rebooting"]

        if not terseOutput:
            try:
                with open(pkgs,"r") as f:
                    if machineOutput:
                        msg.extend([f"REQUESTED_REBOOT: {package.strip()}" for package in f.readlines()])
                    else:
                        msg[0]+=". The following packages have indicated that a reboot is required:"
                        msg.extend([f"\t{package.strip()}" for package in f.readlines()])
                        msg.append("")
            except OSError as e:
                sys.stderr.write(f"ERROR: File exists but is unreadable: {pkgs}: {e}\n")
                sys.exit(3)
    elif os.path.exists(flag):
        reboot_needed=True
        if machineOutput:
            msg=[f"REBOOT_NEEDED: Yes; MISSING: {pkgs}"]
        else:
            msg=[f"The system needs rebooting: ({flag} exists but there is no {pkgs}, so we do not know why)"]
    else:
        reboot_needed=False
        msg=[]

    if machineOutput:
        msg.append(f"RESTART_NEEDED: {n}")
    else:
        msg.append(f"{n} non-ignored program(s) or unit(s) need restarting (see checkrestart(8))")
    sys.stdout.flush()


    if terseOutput:
        print('; '.join(msg))
    else:
        print('\n'.join(msg))

    if n==0 and not reboot_needed:
        sys.exit(0)

    if terseOutput:
        # use Nagios exit codes: 0 OK, 1 warning, (2 critical, 3 unknown)
        sys.exit(1)

    # else, we are not in terse mode (no -t) and so have more output to produce
    systemdCommands=[]       # [String] - commands to do the restart of units
    for s in systemdUnits:
        if is_ignored(s.name,suggestions,"dangerous-unit:",False,cache):
            prefix=caution
        else:
            prefix=""
        systemdCommands.append(f"{prefix}{s.restart_command}")

    if not using_cgroups2:
        search_packages_for_suggestions(packages,using_systemd)

    restartWithSystemd=[]    # [Package] - suggested units to restart
    restartWithInitscript=[] # [Package] - suggested initscripts to restart
    initCommands=[]          # [String]  - suggested initscript commands

    restartOther=[]          # [Package] - no unit/initscript

    for package in packages:
        done=False
        if len(package.systemdservice)>0:
            if DEBUG: print(f"Maybe suggesting restart via systemd for package {package.name}")
            for s in sorted(package.systemdservice):
                if is_ignored(s,suggestions,"suggested-unit:",False,cache):
                    if verbose:
                        if machineOutput:
                            print(f"not_suggesting_unit\t{s}\t{package.name}")
                        else:
                            print(f"Not suggesting unit {s} (from package: {package.name})")
                else:
                    if not done:
                        restartWithSystemd.append(package)
                        done=True

                    if is_ignored(s,suggestions,"dangerous-unit:",False,cache):
                        prefix=caution
                    else:
                        prefix=""
                    systemdCommands.append(f"{prefix}systemctl try-restart {s} # suggested - from package:{package.name}")

            if not done:
                if verbose and not machineOutput:
                    print(f"All units that might restart programs in package {package.name} have been excluded")
        if (not done) and len(package.initscripts) > 0:
            if DEBUG: print(f"Maybe suggesting restart via initscript for package {package.name}")
            for s in sorted(package.initscripts):
                if is_ignored(s,suggestions,"suggested-initscript:",False,cache):
                    if verbose:
                        if machineOutput:
                            print(f"not_suggesting_initscript\t{s}\t{package.name}")
                        else:
                            print(f"Not suggesting initscript {s} (from package: {package.name})")
                if not done:
                    restartWithInitscript.append(package)
                    done=True

                if is_ignored(s,suggestions,"dangerous-initscript:",False,cache):
                    prefix=caution
                else:
                    prefix=""
                initCommands.append(f"{prefix}service {s} restart # suggested - from package:{package.name}")
            if not done:
                if verbose and not machineOutput:
                    print(f"All initscripts that might restart programs in package {package.name} have been excluded")

        if not done:
            restartOther.append(package)



    if not machineOutput:
        show_processes("The following systemd units started programmes that are using deleted files:",systemdUnits,showfiles)
        show_processes("The following packages provided processes that are using deleted files and might be restartable via systemd:",
                       restartWithSystemd,showfiles)
        show_processes("The following packages provided processes that are using deleted files and might be restarted by an initscript:",
                       restartWithInitscript,showfiles)
        show_processes("The following are using deleted files but there is no suggested way to restart them:",
                       restartOther,showfiles)

        if len(systemdCommands)>0:
            print("\n\nSystemd commands:")
            print('\n'.join(systemdCommands))
            print()

        if len(initCommands)>0:
            print("\n\nInit script commands:")
            print('\n'.join(initCommands))
            print()
    else:
        show_processes_Machine("SYSTEMD",
                               systemdUnits,showfiles)

        show_processes_Machine("PACKAGE_SYSTEMD",
                               restartWithSystemd,showfiles)
        show_processes_Machine("SERVICE",
                               restartWithInitscript,showfiles)
        show_processes_Machine("OTHER",restartOther,showfiles)
        for c in systemdCommands:
            print(f"SYSTEMD_COMMAND\t{c}")
        for c in initCommands:
            print(f"SERVICE_COMMAND\t{c}")

    # use Nagios exit codes: 0 OK, 1 warning, (2 critical, 3 unknown)
    sys.exit(1)
    # end of main()

def show_processes(text,packages,showfiles):
    if len(packages)>0:
        print()
        print(text)
        for package in packages:
            print(f"{package.name}:")
            for process in sorted(package.processes,key=lambda p:[p.program,p.pid]):
                if showfiles:
                    print(f"\t{process} is using the following deleted files:")
                    for f in process.files:
                        print(f"\t\t{f}")
                    print()
                else:
                    print(f"\t{process}")
            print()

def show_processes_Machine(what,packages,showfiles):
    for package in packages:
        for process in sorted(package.processes,key=lambda p:[p.program,p.pid]):
            print(f"{what}\t{package.name}\t{repr(process)}")
            if showfiles:
                for f in process.files:
                    print(f"file\t{f}\t{repr(process)}")

def dpkg_query(what,args,clean):
    """run dpkg-query what args
if clean: Working around lack of support for usrmerge
print stderr and yield each line to caller
"""
    dpkgQuery = ["dpkg-query", what]
    if clean:
        dpkgQuery.extend(cleanPathsForDpkgSearch(args))
    else:
        dpkgQuery.extend(args)
    if DEBUG:
        print(f"Running: {repr(dpkgQuery)}")
    dpkgProc = subprocess.run(dpkgQuery,
                              capture_output=True,
                              encoding="utf-8",
                              errors="ignore",
                              env = lc_all_c_env)
    if DEBUG:
        for line in dpkgProc.stderr.splitlines():
            print(f"dpkg-query: stderr: {repr(line)}")

    for line in dpkgProc.stdout.splitlines():
        if DEBUG: print(f"dpkg-query: {repr(line)}")
        yield(line)

def group_by_package(programs):
    """Group programs by debian package

This function returns a dict of name=>Package where the package
ships the programe. Each Package's .processes is set to the
processes that were in programs. Programs is modified to delete
anything found - so any unpackaged programs will be left in programs
"""
    packages={}    # program.name=>Package
    diverted = None

    if len(programs)==0:
        return packages

    if DEBUG:
        print(f"Looking for packages providing: {repr(programs.keys())}")
    for line in dpkg_query("--search",programs.keys(),clean=True):
        if line.startswith('local diversion'):
            continue
        if not ':' in line:
            continue

        m = re.search(r"^diversion by (\S+) (from|to): (.*)$", line)
        if m:
            if m.group(2) == 'from':
                diverted = m.group(3)
                continue
            if not diverted:
                raise Exception(f"checkrestart: Internal error while handling diversion in line {repr(line)}")
            packagename, program = m.group(1), diverted
        else:
            packagename, program = line.split(': ')
            if program == diverted:
                # dpkg-query (sometimes) prints a summary line after the
                # two "diversion by" lines naming both
                # packages involved in the diversion, eg
                # "mutt-patched, mutt: /usr/bin/mutt"
                # (another example is help.txt from vim-tiny and vim-runtime)
                continue

            if not programs.get(program):
                # Some complications due to usrmerge:
                # eg: bash    ships '/bin/bash' but scripts may run as '/usr/bin/bash'
                # eg: python3 ships '/usr/bin/python3' but scripts may run as '/bin/python3'
                added_links=add_usrmerge_symlinks(program)
                if programs.get(added_links):
                    # /usr/bin/prog packaged but /bin/prog running
                    if DEBUG: print(f"program found after symlinks added: packaged: {program} -> running: {added_links}")
                    program=added_links
                else:
                    removed_links=remove_usrmerge_symlinks(program)
                    if programs.get(removed_links):
                        # /bin/prog packaged but /use/bin/prog running
                        if DEBUG: print(f"program found after symlinks removed: packaged: {program} -> running: {removed_links}")
                        program=removed_links

            # The next 'if' is needed in rare circumstances: Suppose a
            # container shares its /proc with its host system and that
            # the host is usrmerged and the container is not: a
            # checkrestart in the container could detect /usr/bin/prog
            # running outside, But the key in programs could be
            # /bin/prog if that is what is packaged (The best outcome
            # is to consider such a process as unpackaged, hence the
            # 'if' below)
            if programs.get(program):
                if DEBUG: print(f"program {program} found in package: {packagename}")
                packages.setdefault(packagename,Package(packagename))
                packages[packagename].processes.extend(programs[program])
                del programs[program]

    return packages


def search_packages_for_suggestions(packages,using_systemd):
    """ Look for systemd unit and initscript in packages. Only used if not using cgroups v2"""
    for package in packages:
        if package.is_fake:
            continue

        for path in dpkg_query("--listfiles",[package.name],
                               clean=False):
            # If running on a systemd system, extract the systemd's service files from the package
            if using_systemd and (path.startswith('/lib/systemd/system/') or path.startswith('/usr/lib/systemd/system/')) and path.endswith('.service') and path.find('.wants') == -1:
                if path.endswith('@.service'):
                    continue
                unit_name=os.path.basename(path)
                # Previous versions ignored 'oneshot' units, but you can restart a oneshot
                # unit_type=subprocess.run(["systemctl","show",unit_name,"-P","Type"]).stdout.strip()
                #if unit_type != "oneshot": package.systemdservice.add(unit_name)
                if DEBUG: print(f"Found suggested systemd unit {path} for {package.name}")
                package.systemdservice.add(unit_name)

            # find initscripts
            elif path.startswith('/etc/init.d/'):
                if DEBUG: print(f"Found suggested init script {path} for {package.name}")
                package.initscripts.add(path[12:])

        # Alternatively, find init.d scripts that match the process name
        if len(package.initscripts) == 0 and len(package.systemdservice) == 0:
            for process in package.processes:
                path = os.path.basename(process.program)
                if os.path.exists(f"/etc/init.d/{path}"):
                    if DEBUG: print(f"Found suggested init script {path} for {package.name} using name of process {process.program}")
                    package.initscripts.add(path)


def lsoffilescheck(using_cgroups2):
    """Use lsof(1) to find processes using deleted files
returns: [Process] where each entry is using a deleted file."""
    processes = {}

    # not sure if this is needed on modern kernels?
    path_inode_regexp=re.compile(r"\(path inode=[0-9]+\)$")

    # -X ~ exclude network files
    # -L ~ include all link counts
    # -F pfkn0 ~ show fields n (filename), f(descriptor), p(pid), k(no. links), 0~NUL-separated
    # -w ~ hide warnings (fix #99150)
    for line in subprocess.run(['lsof', '+XL', '-F', 'pfkn0', '-w'],
                               capture_output=True,
                               encoding="utf-8",
                               errors="ignore",
                               env = lc_all_c_env).stdout.splitlines():
        line=line[:-1] # strip trailing NUL
        fieldset=line[0]
        if fieldset=="p":
            # Info about process: p<PID>
            pid=int(line[1:])
            process=processes.get(pid) # Process or None
            if not process:
                if DEBUG: print(f"Creating Process object for PID {pid}")
                process=Process(pid,using_cgroups2)
                processes[pid]=process
            else:
                # lsof can produce multiple fieldsets for the same pid
                # - possibly one per task (thread) (The '-K i' option
                # to lsof may prevent this, but unclear what that
                # hides)
                if DEBUG: print(f"lsof produced duplicate data for PID {pid} - merging")
        elif fieldset=="f":
            # Info about a file used by process: f<fd><NUL>k<links><NUL>n<PATH><NUL>
            fd=""
            filename=""
            links=-1
            for f in line.split('\0'):
                if f.startswith("f"):
                    fd=f[1:]
                elif f.startswith("k"):
                    try:
                        links=int(f[1:])
                    except ValueError:
                        links=-1
                elif f.startswith('n'):
                    # n/path/to/file or n[signalfd] etc
                    filename=f[1:]
                    # If the name starts with (deleted) put it in the end of the
                    # file name, this is used to workaround different behaviour in
                    # OpenVZ systems, see
                    # https://bugzilla.openvz.org/show_bug.cgi?id=2932
                    if filename.startswith('(deleted)'):
                        filename = filename[9:] + ' (deleted)'
                    elif filename.startswith(' (deleted)'):
                        filename = name[10:] + ' (deleted)'
                else:
                    print(f"Unknown field {f} from lsof in pid {process.pid}: {repr(line)}")
            if filename.startswith('/'):
                if filename.endswith(" (deleted)"):
                    filename=filename[:-10]
                    if DEBUG: print(f"lsof reported that PID {process.pid} is using deleted file {filename}; lsof data was: {repr(line)}")
                    process.files.add(filename)
                elif path_inode_regexp.search(filename):
                    if DEBUG: print(f"lsof reported that PID {process.pid} is using 'path inode' file {filename}; lsof data was: {repr(line)}")
                    filename=path_inode_regexp.sub("",filename)
                    process.files.add(filename)
                elif links==0 or fd=="DEL":
                    if DEBUG: print(f"lsof reported that PID {process.pid} is using deleted file {filename} (detected via fd={fd},links={links}); lsof data was: {repr(line)}")
                    process.files.add(filename)
        else:
            print(f"Unknown fieldset from lsof: {repr(line)}")

    return [process for process in processes.values() if len(process.files)>0]

def procfilescheck(ignoredpids,using_cgroups2):
    """Use the /proc file system to determine processes that are using deleted files.
Any pid is ignoredpids (list of int) is silently ignored
(This sometimes finds fewer deleted files than lsoffilescheck)"""
    processes = []
    pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]

    for pid in pids:
        if pid in ignoredpids:
            continue

        # Get the list of open files for this process from /proc We
        # ignore failures because links may disappear while we run
        foundfiles = set()
        try:
            for fd in os.listdir('/proc/' + pid + '/fd'):
                if os.path.islink('/proc/' + pid + '/fd/' + fd):
                    fname = os.readlink('/proc/' + pid + '/fd/' + fd)
                    if re.compile(r"\s\(deleted\)$").search(fname):
                        if DEBUG: print(f"/proc/pid/fd shows PID {pid} using deleted file {f}")
                        foundfiles.add(fname[:-10])
        except OSError:
            continue

        # Get the list of memory mapped files using pmap
        for output in subprocess.run(['pmap', '--show-path', pid],
                                     capture_output=True,
                                     encoding="utf-8",
                                     errors="ignore",
                                     env = lc_all_c_env).stdout.splitlines():
            data = re.split(r"\s+", output, 3)
            if len(data) == 4:
                f = data[3]
                if re.compile(r"\s\(deleted\)$").search(f):
                    if DEBUG: print(f"pmap reported PID {pid} using deleted file {repr(f)}")
                    foundfiles.add(f[:-10])

        if len(foundfiles)>0:
            process=Process(int(pid),using_cgroups2)
            process.files = foundfiles
            processes.append(process)

    return processes


def ispackagedFile(f):
    """Check if file f (path) is part of a debian package
# Returns:
#  - False - file does not exist or is not part of a debian package (according to dpkg-query --search)
#  - True  - file is found in an operating system package
"""
    file_in_package = False
    file_regexp = False
    soname_regexp=re.compile(r"\.so[\d.]+$")
    if DEBUG: print(f"Checking if file {f} belongs to any package")
    # First check if the file exists
    if not os.path.exists(f):
        if f.startswith('/lib/') or f.startswith('/usr/lib/'):
            # A library that does not exist may have been upgraded to a new soname
            if DEBUG: print(f"{f} is a deleted library")
            f = soname_regexp.sub(".so.*", f)
            file_regexp = True
        else:
            if DEBUG:
                print(f"File {f} is deleted but is not a library: assuming it is not part of a package")
            return False

    # If it exists, run dpkg-query
    file_in_package = False
    for line in dpkg_query("--search", [f],clean=True):
        if line.endswith(f": {f}") or (file_regexp and re.search(f": {f}$", line)):
            package = line.split(":")[0]
            file_in_package = True
            break

    if DEBUG:
        if file_in_package:
            print(f"File {f} belongs to package: {package}")
        else:
            print(f"File {f} does not belong to any package")

    return file_in_package

def is_ignored(f, ignorelist,key,onlyPackageFiles,cached):
    """Decide if a f should be reported (ie not matched by anything in ignorelist.)
 if '-p' then only report deleted files from packages
 Returns:
  - True if f is in ignorelist (result is cached in the last argument)
  - False otherwise."""
    cache_hit=cached[key].get(f)
    if cache_hit is not None:
        if DEBUG: print(f"Cache hit: {key} {f}: ignored={cache_hit}")
        return cache_hit

    for regexp in ignorelist[key]:
        if regexp.search(f):
            if DEBUG: print(f"Exclusion found for {key} {f} using: {regexp}")
            cached[key][f]=True
            return True

    if onlyPackageFiles:
        cached[key][f]=not ispackagedFile(f)
    else:
        cached[key][f]=False
    return cached[key][f]

def cleanPathsForDpkgSearch(paths):
    """Work around for usrmerged systems: dpkg-query --search /usr/bin/bash
does not work, because the bash package only installed /bin/bash (if
dpkg-query is ever fixed then this can be deleted"""
    result=set(paths)
    for p in paths:
        result.add(remove_usrmerge_symlinks(p))
        result.add(add_usrmerge_symlinks(p))
    return result

def remove_usrmerge_symlinks(path):
    """/usr/bin --> /bin and /usr/lib -> /lib etc
- needed on usrmerged systems where
dpkg-query does not find the path if it does not match the exact
name shipped in the package (ie all of Debian)
- see /usr/lib/usrmerge/convert-usrmerge for the list of
directories involved"""
    bin_regexp=re.compile(r"^/usr/(s)?bin/")
    lib_regexp=re.compile(r"^/usr/lib(64|32|o32|x32)?/")
    return bin_regexp.sub(r'/\1bin/', lib_regexp.sub(r'/lib\1/',path))

def add_usrmerge_symlinks(path):
    """Reverses remove_usrmerge_symlinks"""
    bin_regexp=re.compile(r"^/(s)?bin/")
    lib_regexp=re.compile(r"^/lib(64|32|o32|x32)?/")
    return bin_regexp.sub(r"/usr/\1bin/", lib_regexp.sub(r"/usr/lib\1/",path))


def show_ignore(description,what,process,verbose,machineOutput):
    if verbose:
        if machineOutput:
            print(f"ignored_{what}\t{repr(process)}")
        else:
            print(f"Ignoring {description}: {process}")

class Process:
    """A Process represents a running process"""


    # if /usr is a symlink (eg: /usr -> /srv/usr), then we want to replace '/srv/usr/bin/python3'
    # with /bin/python3 so that we can find which package it is in.
    usrsymlinkd = os.path.islink('/usr')
    if usrsymlinkd:
        usr_target = os.path.realpath('/usr')

    def __init__(self, pid,using_cgroups2):
        """
        Process
        # If using_cgroups2 then we use
        # cgroups to identify which unit to restart
        """

        self.pid = pid
        self.files = set()
        self._exe = None     # /proc/pid/exe
        self.prefix=""       # "" or Deleted
        self.what="Program"  # Can be changed to "Script", depending on self.cmdline
        self._program = None # cleaned version of self._exe
        self._cmdline=None   # /proc/pid/cmdline

        self._systemd_unit=None # name of unit that started the process (or False)
        self._using_cgroups2=using_cgroups2

    @property
    def program(self):
        """If the program name (_program) has not been set, infer it from /proc/pid/exe.
If the program is a script, use the first executable non-option argument in /proc/pid/cmdline instead"""
        if self._program is None:
            try:
                self._program = os.readlink(f"/proc/{self.pid}/exe")
            except OSError as e:
                if e.errno != errno.ENOENT:
                    # eg, main thread exited (see proc(5))
                    sys.stderr.write(f"WARNING: Failed to read /proc/{self.pid}/exe\n")
                    self._program=f"<unknown: PID: {self.pid}>"
                else:
                    # some other error, possibly indicating it closed
                    self._program=f"<Deleted: PID: {self.pid}>"

            # /proc/pid/exe has all kinds of junk in it sometimes
            null = self._program.find('\0')
            if null != -1:
                self._program = self._program[:null]
            self._exe=self.program

            # clean self._program
            if Process.usrsymlinkd and self._program.startswith(Process.usr_target):
                # if /usr -> /srv/usr we want to report '/usr/bin/python3' not '/srv/usr/bin/python3')
                if DEBUG: print(f"Rewriting path for {self._program} due to symlink /usr -> {Process.usr_target}")
                self._program = re.sub(Process.usr_target,'^/usr',self._program)

            self._program,program_deleted=re.subn(r"( \(deleted\)|.dpkg-new)\s*$",'',self._program)
            if program_deleted==1:
                if DEBUG: print(f"Program is deleted: {self._program}")
                # self._program should already be in self.files
                self.prefix="Deleted"
            else:
                self.prefix=""

            if re.search(r"^(/usr)?/bin/(perl|python|ruby|tclsh|bash|dash)", self._program):
                if DEBUG: print(f"program {self._program} (PID {self.pid}) is a script")

                # we are a script run by an interpreter such as
                # perl/python/ruby/tclsh, we want to use the script's name

                # Spamd adds a blank to $0, see
                # https://bugzilla.redhat.com/show_bug.cgi?id=755644
                #(...which seems to be closed so is this even needed any more?)
                m = re.search(r"^/usr/sbin/spamd |^spamd ", self.cmdline[0])
                if m:
                    self._program = "/usr/sbin/spamd"
                else:
                    # Skip first entry (the interpreter)
                    for arg in self.cmdline[1:]:
                        # Ignore any options following the interpreter, e.g. python's -O (see #715000)
                        if DEBUG: print(f"Found option {repr(arg)} to {self._program}")
                        if not arg.startswith("-"):
                            arg=shutil.which(arg)
                            if arg is not None:
                                # arg is the name of an executable script (BUG: does not detect deleted scripts)
                                if DEBUG: print(f"PID {self.pid} (a {self._program} script) treated as if named: {arg}")
                                self._program = arg
                                self.what="Script"
                                break
            else:
                self.what="Program"
        return self._program

    @property
    def cmdline(self):
        """Return an array containing the program and any arguments.  Uses
/prpc/pid/cmdline so we are trusting the process to set this correctly. see proc(5).  Never
call self.program in this method or you could get an infinite loop."""
        if self._cmdline is None:
            try:
                with open(f"/proc/{self.pid}/cmdline", 'r') as cmdline:
                    # cmdline is NULL-separated, with a trailing NULL per proc(5)
                    data=cmdline.read().split('\0')
                    if not data[-1]: data.pop()
            except OSError:
                data=[]
            if len(data)==0:
                data=[f"Unknown (PID {self.pid})"]
            self._cmdline=data
            if DEBUG: print(f"Set cmdline to: {self._cmdline} for PID:{self.pid}")
        return self._cmdline

    def __str__(self):
        """Human-readable representation
Shows:
 [Deleted] {Program|Script} PATH (PID, [exe], [command line])

exe is hidden if it matches path
command line is hidden if it is the same as PATH
"""
        # nb: calling self.programe sets self._exe
        if self.program != self._exe:
            exe=f", EXE: '{self._exe}'"
        else:
            exe=""
        if len(self.cmdline)!=1 or self.program!=self.cmdline[0]:
            args=f", CMDLINE: '{' '.join(self.cmdline)}'"
        else:
            args=""
        if self.prefix!="": # "Deleted" or ""
            sep=" "
        else: sep=""
        return f"{self.prefix}{sep}{self.what} {self.program} (PID: {self.pid}{exe}{args})"

    def __repr__(self):
        """Machine-readable representation.
Tab-separated version of __str__ - but always shows all fields"""
        return f"{self.pid}\t{self.program}\t{self._exe}\t{self.cmdline}\t{self.prefix}\t{self.what}"

    @property
    def systemd_unit(self):
        """Use cgroup v2 to find systemd service that started self.
 Sets self.systemd_unit, self.systemd_type and self.systemd-command"""
        if self._systemd_unit is None and self._using_cgroups2:
            if DEBUG: print(f"Finding unit for {self.program} (PID={self.pid})")
            cgroup_file=f"/proc/{self.pid}/cgroup"
            if os.path.exists(cgroup_file):
                try:
                    # /proc/pid/cgroup contains line:  hierarchy_id:controller_list:cgroup_path
                    # in cgroups2, there is only one hierachy_id, so the file has one line like:
                    #   0::/system.slice/dbus.service\n
                    #   0::/user.slice/user-1000.slice/user@1000.service/app.slice/dbus.service\n
                    #   0::/user.slice/user-1001.slice/user@1001.service/init.scope\n
                    #   0::/machine.slice/machine-unstable\x2damd64.scope/payload/system.slice/console-getty.service
                    #
                    # (in cgroups v1, there are multiple hierachies,
                    # we would want the one called 'name=systemd'
                    # and then the cgroup_path should be as above:
                    # supporting cgroups v1 is therefore possible,
                    # however, this is not implemented.)
                    #
                    # HOWEVER: If you run a container with
                    # systemd-nspawn,
                    # - by default the container uses cgroups v1 (at least when the host is bullseye)
                    # - processes on the host now have 2 lines in their cgroup_file
                    #   + one starting 0 as above
                    #   + one which is just 1:name=systemd:/
                    #   + (this persists even after the container is shutdown)
                    # - processes in the container have
                    #   + line startung 0 - points to machine.slice/machine-<name>.scope/payload/...
                    #   + line starting 1 - gives a path to
                    #      the container's unit
                    # (and if checkrestart is started in such a container: we would detect cgroups v1
                    #   host processes are of course not visible inside the container)
                    # untested: host running cgroups1 with containers running v2
                    with open(cgroup_file, 'r') as f:
                        for line in f.readlines():
                            if line.startswith("0::"):
                                self._systemd_unit=line.strip()
                                break
                except OSError as e:
                    # should never happen
                    sys.stderr.write(f"WARNING: unable to read cgroup file: {cgroup_file}: {e}\n")
                    self._systemd_unit=None
            if DEBUG: print(f"program {self.program} (PID {self.pid}) is part of systemd unit: {self._systemd_unit}")
        return self._systemd_unit

class Package:
    """Package represents either:
- a .deb package,
- an unpackaged program

The latter has is_fake set to True"""
    def __init__(self, name, is_fake=False):
        self.name = name
        self.is_fake=is_fake
        self.initscripts = set()
        self.systemdservice = set()
        self.processes = []


class SystemdUnit:
    """Represents a systemd unit (eg: apache2.service) and the processes it started"""
    def __init__(self, cgroup):
        """cgroup is a string such as '0::/system.slice/x.service"""
        self.cgroup_path=cgroup.split("/")
        self.unit_slice=self.cgroup_path[1]
        self.unit_name=self.cgroup_path[-1]

        if self.unit_slice=="system.slice":
             # ['0::','system.slice','x.service']
            self._machine=".host"
        elif self.unit_slice=="user.slice":
            # eg ['0::','user.slice', 'user-N.slice' 'session-Y.scope','x.service']
            self.user_slice=self.cgroup_path[2]
            self._machine=".host"
        elif self.unit_slice=="machine.slice":
            self.machine_scope=self.cgroup_path[2]
            self._machine=None # machine name is set on demand
            self.unit_slice=self.cgroup_path[4]
            if self.unit_slice=="system.slice":
                # ['0::', 'machine.slice', 'machine-NAME.scope', 'payload', 'system.slice', 'x.service']
                self.user_slice=None
            elif self.unit_slice=="user.slice":
                # ['0::', 'machine.slice', 'machine-NAME.scope', 'payload', 'user.slice', 'user-N.slice', 'x.service']
                self.user_slice=self.cgroup_path[5]
            self._machine=None # set on demand for units started by containers
        elif self.unit_slice=="init.scope":
            # 0::/init.scope contains init (PID 1)
            self._machine=".host"
        else:
            if DEBUG: print(f"unexpected cgroup format: {self.cgroup_path}")
            self._machine=".host"

        # properties set on demand
        self._user=None    # set on demand for units in user.slice (ie started by user session managers)
        self._name=None    # set on demand
        self._restart_command=None # set on demand

        self.processes=[]

    def __repr__(self):
        """human readable name of unit"""
        self.name

    @property
    def name(self):
        """human readable name of unit"""
        if self._name is None:
            if self.machine==".host":
                in_machine=""
            else:
                in_machine=f" in container: {self.machine}"

            if self.unit_slice=="system.slice":
                suffix=in_machine
            elif self.unit_slice=="user.slice":
                suffix=f" (started by user {self.user}{in_machine})"
            else:
                suffix=f" (in {self.unit_slice}{machine})"
            self._name=f"{self.unit_name}{suffix}"

        return self._name

    @property
    def user(self):
        if self._user is None:
            try:
                # user-XXX.slice -> XXX
                self._user=self.user_slice[5:-6]
            except IndexError:
                # should not happen
                if DEBUG: print(f"unexpected user slice in cgroup: {self.cgroup_path}")
                self._restart_command=False
                self._user="UNKNOWN"
        return self._user

    @property
    def machine(self):
        """Set name of machine from self.machine_scope"""
        if self._machine is None:
            try:
                # machine-unstable\\x2damd64\\x2dsbuild.scope -> unstable-amd64-sbuild
                self._machine=subprocess.run(['systemd-escape', '--unescape',self.machine_scope[8:-6]],
                                             capture_output=True,
                                             encoding="utf-8",
                                             errors="ignore").stdout.strip()
            except IndexError:
                # should not happen
                if DEBUG: print(f"unexpected machine name in cgroup: {self.cgroup_path}")
                self._restart_command=False
                self._machine="UNKNOWN" # could use .host here perhaps
        return self._machine

    @property
    def restart_command(self):
        """Command to restart the unit.

If we dont know how to restart the unit, this will be False, else a string which
is a command to cause a restart (e.g., 'systemctl restart foo.service').
        """
        if self._restart_command is None:
            if self.unit_name.endswith(".service"):
                if self.unit_slice=="system.slice":
                    if self.machine==".host":
                        self._restart_command=f"systemctl try-restart {self.unit_name}"
                    else:
                        # service inside systemd-nspawn container
                        self._restart_command=f"systemctl --machine={self.machine} try-restart {self.unit_name}"
                elif self.unit_slice=="user.slice":
                    # user unit (the use of --machine <user>@.host requires systemd v248)
                    self._restart_command=f"# systemctl --user --machine={self.user}@{self.machine} try-restart {self.unit_name}"
                else:
                    if DEBUG: print(f"service in neither system nor user slice: {self.unit_slice} ({self.cgroup_path})")
                    self._restart_command=False
            else:
                # eg, PID 1 is inside init.scope, (a scope can contain
                # processes and slices not started via .service)
                if DEBUG: print(f"unit is not a service: {self.unit_name} ({self.cgroup_path})")
                self._restart_command=False
        return self._restart_command


if __name__ == '__main__':
    try:
        main()
    except (BrokenPipeError, KeyboardInterrupt):
        sys.exit(3)
