#!/usr/lib64/linuxfabrik-monitoring-plugins/venv/bin/python
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import json
import sys

import lib.args
import lib.base
import lib.human
import lib.lftest
import lib.txt
import lib.url
import lib.veeam
from lib.globals import STATE_CRIT, STATE_OK, STATE_UNKNOWN, STATE_WARN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026051301'

DESCRIPTION = """Monitors Veeam Backup & Replication via PowerShell, checking for failed VMs and jobs,
jobs running longer than expected, and backup repository usage. Also reports backup
infrastructure component status and recent job results."""

DEFAULT_CRIT = 90
DEFAULT_FAILED_JOB_RUNS = 0
DEFAULT_FAILED_VM_LASTEST_STATES = 0
DEFAULT_INSECURE = True
DEFAULT_MAX_BACKUP_JOB_DURATION = 86400
DEFAULT_MAX_REPLICA_JOB_DURATION = 86400
DEFAULT_NO_PROXY = False
DEFAULT_TIMEOUT = 3
DEFAULT_URL = 'https://localhost:9398'
DEFAULT_USERNAME = 'Administrator'
DEFAULT_WARN = 80
DEFAULT_WARNING_VM_LASTEST_STATES = 0
DEFAULT_WARNINGS_JOB_RUNS = 0

def _normalize_keys(obj):
    """Recursively convert all dict keys to PascalCase.

    Veeam Enterprise Manager v13 ships JSON keys in camelCase
    ('backedUpVms') instead of the v12 PascalCase ('BackedUpVms')
    because the .NET migration switched the default JSON serializer.
    Upper-casing the first character is idempotent and keeps the rest
    of the plugin working against both v12 and v13 responses.
    """
    if isinstance(obj, dict):
        return {k[:1].upper() + k[1:]: _normalize_keys(v) for k, v in obj.items()}
    if isinstance(obj, list):
        return [_normalize_keys(item) for item in obj]
    return obj


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '-c',
        '--critical',
        help='CRIT threshold for backup repository usage as a percentage. '
        'Default: >= %(default)s',
        dest='CRIT',
        type=int,
        default=DEFAULT_CRIT,
    )

    parser.add_argument(
        '--failed-job-runs',
        help='Veeam threshold for `FailedJobRuns`. Default: > %(default)s.',
        dest='FAILED_JOB_RUNS',
        type=int,
        default=DEFAULT_FAILED_JOB_RUNS,
    )

    parser.add_argument(
        '--failed-vm-lastest-states',
        help='Veeam threshold for `FailedVmLastestStates`. Default: > %(default)s.',
        dest='FAILED_VM_LASTEST_STATES',
        type=int,
        default=DEFAULT_FAILED_VM_LASTEST_STATES,
    )

    parser.add_argument(
        '--insecure',
        help=lib.args.help('--insecure'),
        dest='INSECURE',
        action='store_true',
        default=DEFAULT_INSECURE,
    )

    parser.add_argument(
        '--max-backup-job-duration',
        help='Maximum allowed backup job duration in seconds. Default: > %(default)s.',
        dest='MAX_BACKUP_JOB_DURATION',
        type=int,
        default=DEFAULT_MAX_BACKUP_JOB_DURATION,
    )

    parser.add_argument(
        '--max-replica-job-duration',
        help='Maximum allowed replica job duration in seconds. Default: > %(default)s.',
        dest='MAX_REPLICA_JOB_DURATION',
        type=int,
        default=DEFAULT_MAX_REPLICA_JOB_DURATION,
    )

    parser.add_argument(
        '--no-proxy',
        help=lib.args.help('--no-proxy'),
        dest='NO_PROXY',
        action='store_true',
        default=DEFAULT_NO_PROXY,
    )

    parser.add_argument(
        '-p',
        '--password',
        help='Veeam REST API password.',
        dest='PASSWORD',
        required=True,
    )

    parser.add_argument(
        '--test',
        help=lib.args.help('--test'),
        dest='TEST',
        type=lib.args.csv,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '--url',
        help='Veeam REST API URL. Default: %(default)s',
        dest='URL',
        default=DEFAULT_URL,
    )

    parser.add_argument(
        '--username',
        help='Veeam REST API username. Default: %(default)s',
        dest='USERNAME',
        default=DEFAULT_USERNAME,
        required=True,
    )

    parser.add_argument(
        '-w',
        '--warning',
        help='WARN threshold for backup repository usage as a percentage. '
        'Default: >= %(default)s',
        dest='WARN',
        type=int,
        default=DEFAULT_WARN,
    )

    parser.add_argument(
        '--warnings-job-runs',
        help='Veeam threshold for `WarningsJobRuns`. Default: > %(default)s.',
        dest='WARNINGS_JOB_RUNS',
        type=int,
        default=DEFAULT_WARNINGS_JOB_RUNS,
    )

    parser.add_argument(
        '--warning-vm-lastest-states',
        help='Veeam threshold for `WarningVmLastestStates`. Default: > %(default)s.',
        dest='WARNING_VM_LASTEST_STATES',
        type=int,
        default=DEFAULT_WARNING_VM_LASTEST_STATES,
    )

    args, _ = parser.parse_known_args()
    return args


def main():
    """The main function. This is where the magic happens."""

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    if args.TEST is None:
        success, result = lib.veeam.get_token(args)
        if not success:
            lib.base.cu(result)

        header = {
            'X-RestSvcSessionId': result['X-RestSvcSessionId'],
            'Accept': 'application/json',
        }
        result = {}

        # https://helpcenter.veeam.com/docs/backup/em_rest/reports_summary_overview.html?ver=110
        url = args.URL + '/api/reports/summary/overview'
        result['overview'] = lib.base.coe(
            lib.url.fetch_json(
                url,
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            ),
        )

        # https://helpcenter.veeam.com/docs/backup/em_rest/reports_summary_statistics.html?ver=110
        url = args.URL + '/api/reports/summary/job_statistics'
        result['job_statistics'] = lib.base.coe(
            lib.url.fetch_json(
                url,
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            ),
        )

        # https://helpcenter.veeam.com/docs/backup/em_rest/reports_summary_vms_overview.html?ver=110
        url = args.URL + '/api/reports/summary/vms_overview'
        result['vms_overview'] = lib.base.coe(
            lib.url.fetch_json(
                url,
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            ),
        )

        # https://helpcenter.veeam.com/docs/backup/em_rest/reports_summary_repository.html?ver=110
        url = args.URL + '/api/reports/summary/repository'
        result['repository'] = lib.base.coe(
            lib.url.fetch_json(
                url,
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            ),
        )
    else:
        # do not call the command, put in test data
        stdout, _stderr, _retc = lib.lftest.test(args.TEST)
        result = json.loads(stdout)

    # Normalize per-endpoint payloads only; the outer keys
    # (overview, job_statistics, ...) are set above by the plugin itself.
    for key in result:
        result[key] = _normalize_keys(result[key])

    # init some vars
    msg = ''
    state = STATE_OK
    perfdata = ''
    table_values = []

    # analyze data - "Lastets" (Veeam: sic!)

    table_values.append(
        {'key': 'BackedUpVms', 'value': result['vms_overview']['BackedUpVms']}
    )
    table_values.append(
        {'key': 'BackupServers', 'value': result['overview']['BackupServers']}
    )

    val = result['job_statistics']['FailedJobRuns']
    local_state = STATE_OK
    if val > args.FAILED_JOB_RUNS:
        local_state = STATE_CRIT
        state = lib.base.get_worst(state, local_state)
        msg += (
            f'{val} '
            f'{lib.txt.pluralize("Job", val)} failed'
            f'{lib.base.state2str(local_state, prefix=" ")}, '
        )
    table_values.append(
        {
            'key': 'FailedJobRuns',
            'value': str(result['job_statistics']['FailedJobRuns'])
            + lib.base.state2str(local_state, prefix=' '),
        }
    )

    val = result['overview']['FailedVmLastestStates']
    local_state = STATE_OK
    if val > args.FAILED_VM_LASTEST_STATES:
        local_state = STATE_CRIT
        state = lib.base.get_worst(state, local_state)
        msg += (
            f'{val} '
            f'{lib.txt.pluralize("VM", val)} failed'
            f'{lib.base.state2str(local_state, prefix=" ")}, '
        )
    table_values.append(
        {
            'key': 'FailedVmLastestStates',
            'value': str(result['overview']['FailedVmLastestStates'])
            + lib.base.state2str(local_state, prefix=' '),
        }
    )

    table_values.append(
        {
            'key': 'FullBackupPointsSize',
            'value': lib.human.bytes2human(
                result['vms_overview']['FullBackupPointsSize']
            ),
        }
    )
    table_values.append(
        {
            'key': 'IncrementalBackupPointsSize',
            'value': lib.human.bytes2human(
                result['vms_overview']['IncrementalBackupPointsSize']
            ),
        }
    )

    val = result['job_statistics']['MaxBackupJobDuration']
    local_state = STATE_OK
    if val > args.MAX_BACKUP_JOB_DURATION:
        # job lasts longer than 24 hours
        local_state = STATE_WARN
        state = lib.base.get_worst(state, local_state)
        msg += (
            f'"{result["job_statistics"]["MaxDurationBackupJobName"]}"'
            f' ran for {lib.human.seconds2human(val)}'
            f'{lib.base.state2str(local_state, prefix=" ")}, '
        )
    table_values.append(
        {
            'key': 'MaxBackupJobDuration',
            'value': lib.human.seconds2human(
                result['job_statistics']['MaxBackupJobDuration']
            )
            + lib.base.state2str(local_state, prefix=' '),
        }
    )

    table_values.append(
        {
            'key': 'MaxDurationBackupJobName',
            'value': result['job_statistics']['MaxDurationBackupJobName'],
        }
    )
    table_values.append(
        {
            'key': 'MaxDurationReplicaJobName',
            'value': result['job_statistics']['MaxDurationReplicaJobName'],
        }
    )
    table_values.append(
        {
            'key': 'MaxJobDuration',
            'value': lib.human.seconds2human(
                result['job_statistics']['MaxJobDuration']
            ),
        }
    )

    val = result['job_statistics']['MaxReplicaJobDuration']
    local_state = STATE_OK
    if val > args.MAX_REPLICA_JOB_DURATION:
        # job lasts longer than 24 hours
        local_state = STATE_WARN
        state = lib.base.get_worst(state, local_state)
        msg += (
            f'{result["job_statistics"]["MaxDurationReplicaJobName"]}'
            f' ran for {lib.human.seconds2human(val)}'
            f'{lib.base.state2str(local_state, prefix=" ")}, '
        )
    table_values.append(
        {
            'key': 'MaxReplicaJobDuration',
            'value': lib.human.seconds2human(
                result['job_statistics']['MaxReplicaJobDuration']
            )
            + lib.base.state2str(local_state, prefix=' '),
        }
    )

    table_values.append(
        {'key': 'ProtectedVms', 'value': result['vms_overview']['ProtectedVms']}
    )
    table_values.append(
        {'key': 'ProxyServers', 'value': result['overview']['ProxyServers']}
    )
    table_values.append(
        {
            'key': 'ReplicaRestorePointsSize',
            'value': lib.human.bytes2human(
                result['vms_overview']['ReplicaRestorePointsSize']
            ),
        }
    )
    table_values.append(
        {'key': 'ReplicatedVms', 'value': result['vms_overview']['ReplicatedVms']}
    )
    table_values.append(
        {'key': 'RepositoryServers', 'value': result['overview']['RepositoryServers']}
    )
    table_values.append(
        {'key': 'RestorePoints', 'value': result['vms_overview']['RestorePoints']}
    )
    table_values.append(
        {'key': 'RunningJobs', 'value': result['job_statistics']['RunningJobs']}
    )
    table_values.append(
        {
            'key': 'ScheduledBackupJobs',
            'value': result['job_statistics']['ScheduledBackupJobs'],
        }
    )
    table_values.append(
        {'key': 'ScheduledJobs', 'value': result['job_statistics']['ScheduledJobs']}
    )
    table_values.append(
        {
            'key': 'ScheduledReplicaJobs',
            'value': result['job_statistics']['ScheduledReplicaJobs'],
        }
    )
    table_values.append(
        {
            'key': 'SourceVmsSize',
            'value': lib.human.bytes2human(result['vms_overview']['SourceVmsSize']),
        }
    )
    table_values.append(
        {
            'key': 'SuccessBackupPercents',
            'value': str(result['vms_overview']['SuccessBackupPercents']) + '%',
        }
    )
    table_values.append(
        {
            'key': 'SuccessfulJobRuns',
            'value': result['job_statistics']['SuccessfulJobRuns'],
        }
    )
    table_values.append(
        {
            'key': 'SuccessfulVmLastestStates',
            'value': result['overview']['SuccessfulVmLastestStates'],
        }
    )
    table_values.append(
        {'key': 'TotalJobRuns', 'value': result['job_statistics']['TotalJobRuns']}
    )

    val = result['job_statistics']['WarningsJobRuns']
    local_state = STATE_OK
    if val > args.WARNINGS_JOB_RUNS:
        local_state = STATE_WARN
        state = lib.base.get_worst(state, local_state)
        msg += (
            f'{val} '
            f'{lib.txt.pluralize("Job", val)} with warnings'
            f'{lib.base.state2str(local_state, prefix=" ")}, '
        )
    table_values.append(
        {
            'key': 'WarningsJobRuns',
            'value': str(result['job_statistics']['WarningsJobRuns'])
            + lib.base.state2str(local_state, prefix=' '),
        }
    )

    val = result['overview']['WarningVmLastestStates']
    local_state = STATE_OK
    if val > args.WARNING_VM_LASTEST_STATES:
        local_state = STATE_WARN
        state = lib.base.get_worst(state, local_state)
        msg += (
            f'{val} '
            f'{lib.txt.pluralize("VM", val)} with warnings'
            f'{lib.base.state2str(local_state, prefix=" ")}, '
        )
    table_values.append(
        {
            'key': 'WarningVmLastestStates',
            'value': str(result['overview']['WarningVmLastestStates'])
            + lib.base.state2str(local_state, prefix=' '),
        }
    )

    for repo in result['repository']['Periods']:
        val = round(float(repo['BackupSize']) / float(repo['Capacity']) * 100, 1)
        local_state = lib.base.get_state(val, args.WARN, args.CRIT)
        state = lib.base.get_worst(state, local_state)
        msg += (
            f'"{repo["Name"]}" {val}%'
            f'{lib.base.state2str(local_state, prefix=" ")} '
            f'used - '
            f'total: {lib.human.bytes2human(repo["Capacity"])}, '
            f'used: {lib.human.bytes2human(repo["BackupSize"])}, '
            f'free: {lib.human.bytes2human(repo["FreeSpace"])}, '
        )
        perfdata += lib.base.get_perfdata(
            'Repo Usage ' + repo['Name'],
            val,
            uom='%',
            warn=args.WARN,
            crit=args.CRIT,
            _min=0,
            _max=100,
        )
        perfdata += lib.base.get_perfdata(
            'Repo Capacity ' + repo['Name'],
            repo['Capacity'],
            uom='B',
            _min=0,
        )
        perfdata += lib.base.get_perfdata(
            'Repo FreeSpace ' + repo['Name'],
            repo['FreeSpace'],
            uom='B',
            _min=0,
        )
        perfdata += lib.base.get_perfdata(
            'Repo BackupSize ' + repo['Name'],
            repo['BackupSize'],
            uom='B',
            _min=0,
        )

    # Build perfdata and property table
    perfdata += lib.base.get_perfdata(
        'BackedUpVms',
        result['vms_overview']['BackedUpVms'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'BackupServers',
        result['overview']['BackupServers'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'FailedJobRuns',
        result['job_statistics']['FailedJobRuns'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'FailedVmLastestStates',
        result['overview']['FailedVmLastestStates'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'FullBackupPointsSize',
        result['vms_overview']['FullBackupPointsSize'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'IncrementalBackupPointsSize',
        result['vms_overview']['IncrementalBackupPointsSize'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'MaxBackupJobDuration',
        result['job_statistics']['MaxBackupJobDuration'],
        uom='s',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'MaxJobDuration',
        result['job_statistics']['MaxJobDuration'],
        uom='s',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'MaxReplicaJobDuration',
        result['job_statistics']['MaxReplicaJobDuration'],
        uom='s',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'ProtectedVms',
        result['vms_overview']['ProtectedVms'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'ProxyServers',
        result['overview']['ProxyServers'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'ReplicaRestorePointsSize',
        result['vms_overview']['ReplicaRestorePointsSize'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'ReplicatedVms',
        result['vms_overview']['ReplicatedVms'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'RepositoryServers',
        result['overview']['RepositoryServers'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'RestorePoints',
        result['vms_overview']['RestorePoints'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'RunningJobs',
        result['job_statistics']['RunningJobs'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'ScheduledBackupJobs',
        result['job_statistics']['ScheduledBackupJobs'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'ScheduledJobs',
        result['job_statistics']['ScheduledJobs'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'ScheduledReplicaJobs',
        result['job_statistics']['ScheduledReplicaJobs'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'SourceVmsSize',
        result['vms_overview']['SourceVmsSize'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'SuccessBackupPercents',
        result['vms_overview']['SuccessBackupPercents'],
        uom='%',
        _min=0,
        _max=100,
    )
    perfdata += lib.base.get_perfdata(
        'SuccessfulJobRuns',
        result['job_statistics']['SuccessfulJobRuns'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'SuccessfulVmLastestStates',
        result['overview']['SuccessfulVmLastestStates'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'TotalJobRuns',
        result['job_statistics']['TotalJobRuns'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'WarningsJobRuns',
        result['job_statistics']['WarningsJobRuns'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'WarningVmLastestStates',
        result['overview']['WarningVmLastestStates'],
        _min=0,
    )

    # build the message
    msg = (
        msg[:-2]
        + '\n\n'
        + lib.base.get_table(
            table_values,
            ['key', 'value'],
            header=['Key', 'Value'],
        )
    )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
