#!/usr/lib64/linuxfabrik-monitoring-plugins/venv/bin/python
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import sys

import lib.args
import lib.base
import lib.db_mysql
import lib.db_sqlite
import lib.human
from lib.globals import STATE_OK, STATE_UNKNOWN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026051101'

DESCRIPTION = """Checks the rate of slow queries in MySQL/MariaDB (`Slow_queries` /
`Questions`). A high ratio means many queries are exceeding `long_query_time` and
likely need optimisation. Alerts when the ratio crosses `--warning` / `--critical`.
Also reports whether the slow query log is enabled and nudges the admin to lower
`long_query_time` to 1 - 3 seconds when it is set above 10 (MySQL/MariaDB default
is 10s, which only catches the worst outliers)."""

DEFAULT_DEFAULTS_FILE = '/var/spool/icinga2/.my.cnf'
DEFAULT_DEFAULTS_GROUP = 'client'
# Match mysqltuner's 5% cut-off, but expose a higher --critical so admins can keep
# WARN for early notice and let the alert escalate when the ratio really runs hot.
DEFAULT_WARN = '5'
DEFAULT_CRIT = '10'
DEFAULT_TIMEOUT = 3

SQLITE_DB = 'linuxfabrik-monitoring-plugins-mysql-slow-queries.db'


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '-c',
        '--critical',
        help=lib.args.help('--critical')
        + ' Supports Nagios ranges. Default: %(default)s',
        dest='CRITICAL',
        default=DEFAULT_CRIT,
    )

    parser.add_argument(
        '--defaults-file',
        help='MySQL/MariaDB cnf file to read user, host and password from. '
        'Example: `--defaults-file=/var/spool/icinga2/.my.cnf`. '
        'Default: %(default)s',
        dest='DEFAULTS_FILE',
        default=DEFAULT_DEFAULTS_FILE,
    )

    parser.add_argument(
        '--defaults-group',
        help=lib.args.help('--defaults-group') + ' Default: %(default)s',
        dest='DEFAULTS_GROUP',
        default=DEFAULT_DEFAULTS_GROUP,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '-w',
        '--warning',
        help=lib.args.help('--warning')
        + ' Supports Nagios ranges. Default: %(default)s',
        dest='WARNING',
        default=DEFAULT_WARN,
    )

    args, _ = parser.parse_known_args()
    return args


def get_vars(conn):
    # Do not implement `get_all_vars()`, just fetch the ones we need for this check.
    # Without the GLOBAL modifier, SHOW VARIABLES displays the values that are used for
    # the current connection to MariaDB.
    sql = """
        show global variables
        where variable_name like 'long_query_time'
            or variable_name like 'slow_query_log'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def get_status(conn):
    sql = """
        show global status
        where variable_name like 'Questions'
            or variable_name like 'Slow_queries'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def main():
    """The main function. This is where the magic happens."""

    # logic taken from mysqltuner.pl:mysql_stats(), section "Slow queries",
    # verified in sync with MySQLTuner

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    mysql_connection = {
        'defaults_file': args.DEFAULTS_FILE,
        'defaults_group': args.DEFAULTS_GROUP,
        'timeout': args.TIMEOUT,
    }
    conn = lib.base.coe(lib.db_mysql.connect(mysql_connection))
    lib.base.coe(lib.db_mysql.check_privileges(conn))

    myvar = lib.db_mysql.lod2dict(get_vars(conn))
    mystat = lib.db_mysql.lod2dict(get_status(conn))
    lib.db_mysql.close(conn)

    # init some vars
    state = STATE_OK
    sections = []
    facts = []
    # All recommendations from all WARN/CRIT paths land here and render once at
    # the end as a `Recommendations:\n* ...` bulleted block, regardless of
    # which combinations of WARN paths fire.
    recommendations = []
    perfdata = ''
    questions = int(mystat['Questions'])
    slow_queries = int(mystat['Slow_queries'])
    long_query_time = float(myvar['long_query_time'])
    slow_query_log = myvar.get('slow_query_log')

    # analyze data
    # A freshly-booted server with no traffic can legitimately report
    # `Questions = 0`. Guard the division locally rather than mutating
    # `mystat`, otherwise the perfdata below would also lie about Questions.
    if questions == 0:
        pct_slow_queries = 0.0
    else:
        pct_slow_queries = round(slow_queries / questions * 100, 2)

    state = lib.base.get_state(
        pct_slow_queries,
        args.WARNING,
        args.CRITICAL,
        _operator='range',
    )
    if state != STATE_OK:
        recommendations.append(
            f'Investigate the slow query log and optimise the {slow_queries}'
            f' slow queries (out of {questions} total)'
        )

    # build the message
    facts.append(
        f'Slow queries: {pct_slow_queries}%'
        f' ({lib.human.number2human(slow_queries)} slow'
        f' / {lib.human.number2human(questions)} total)'
        f'{lib.base.state2str(state, prefix=" ")}'
    )
    facts.append(f'`long_query_time` = {long_query_time}s')
    if long_query_time > 10:
        recommendations.append(
            f'Lower `long_query_time` (currently {long_query_time}s) to'
            f' 1 - 3 seconds for meaningful slow-query coverage; at the'
            f' MySQL/MariaDB default of 10s only the worst outliers are'
            f' captured'
        )
    if slow_query_log == 'OFF':
        facts.append('`slow_query_log` is `OFF`')
        if state != STATE_OK:
            recommendations.append(
                'Enable `slow_query_log` to troubleshoot the slow queries'
            )
    elif slow_query_log == 'ON':
        facts.append('`slow_query_log` is `ON`')

    facts_text = '. '.join(facts) + '.'
    if state == STATE_OK:
        # "Everything is ok." leads the OK message so the admin sees the
        # verdict first; the facts follow.
        sections.append('Everything is ok. ' + facts_text)
    else:
        sections.append(facts_text)
    if recommendations:
        sections.append(
            'Recommendations:\n' + '\n'.join(f'* {r}' for r in recommendations)
        )
    msg = '\n\n'.join(sections)

    perfdata += lib.base.get_perfdata(
        'mysql_long_query_time',
        long_query_time,
        uom='s',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_pct_slow_queries',
        pct_slow_queries,
        uom='%',
        warn=args.WARNING,
        crit=args.CRITICAL,
        _min=0,
        _max=100,
    )

    # Per-CONTRIBUTING: emit `Slow_queries` as a per-second delta (computed
    # in-plugin against a local SQLite cache) instead of a cumulative `c`
    # counter that forces Grafana to do non_negative_difference() per panel.
    # `Questions` per second is intentionally not emitted here; admins who
    # want that metric have it in mysql-traffic already.
    # `Questions` per second is intentionally not exposed here; admins who want
    # that metric have it in mysql-traffic already. The helper auto-rebuilds
    # the cache table if an upgrade from a previous schema is detected.
    rates = lib.db_sqlite.per_second_deltas(
        SQLITE_DB,
        'mysql-slow-queries',
        {'slow_queries': slow_queries},
    )
    slow_queries_per_s = round(rates['slow_queries'], 2) if rates is not None else None
    if slow_queries_per_s is not None:
        perfdata += lib.base.get_perfdata(
            'mysql_slow_queries_per_second',
            slow_queries_per_s,
            _min=0,
        )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
