#!/usr/lib64/linuxfabrik-monitoring-plugins/venv/bin/python
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import sys

import lib.args
import lib.base
import lib.db_mysql
import lib.db_sqlite
import lib.human
from lib.globals import STATE_OK, STATE_UNKNOWN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026051101'

DESCRIPTION = """Checks how often MySQL/MariaDB sorts have to spill from memory to a temporary
merge-sort file (`Sort_merge_passes` / (`Sort_scan` + `Sort_range`)). A high ratio
means `sort_buffer_size` and/or `read_rnd_buffer_size` are too small for the
workload's typical sort. Alerts when the ratio crosses `--warning` / `--critical`."""

DEFAULT_DEFAULTS_FILE = '/var/spool/icinga2/.my.cnf'
DEFAULT_DEFAULTS_GROUP = 'client'
# Match mysqltuner's 10% cut-off, but expose a higher --critical so admins can
# keep WARN for early notice and let the alert escalate when the ratio really
# runs hot.
DEFAULT_WARN = '10'
DEFAULT_CRIT = '20'
DEFAULT_TIMEOUT = 3

SQLITE_DB = 'linuxfabrik-monitoring-plugins-mysql-sorts.db'


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '-c',
        '--critical',
        help=lib.args.help('--critical')
        + ' Supports Nagios ranges. Default: %(default)s',
        dest='CRITICAL',
        default=DEFAULT_CRIT,
    )

    parser.add_argument(
        '--defaults-file',
        help='MySQL/MariaDB cnf file to read user, host and password from. '
        'Example: `--defaults-file=/var/spool/icinga2/.my.cnf`. '
        'Default: %(default)s',
        dest='DEFAULTS_FILE',
        default=DEFAULT_DEFAULTS_FILE,
    )

    parser.add_argument(
        '--defaults-group',
        help=lib.args.help('--defaults-group') + ' Default: %(default)s',
        dest='DEFAULTS_GROUP',
        default=DEFAULT_DEFAULTS_GROUP,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '-w',
        '--warning',
        help=lib.args.help('--warning')
        + ' Supports Nagios ranges. Default: %(default)s',
        dest='WARNING',
        default=DEFAULT_WARN,
    )

    args, _ = parser.parse_known_args()
    return args


def get_vars(conn):
    # Do not implement `get_all_vars()`, just fetch the ones we need for this check.
    # Without the GLOBAL modifier, SHOW VARIABLES displays the values that are used for
    # the current connection to MariaDB.
    sql = """
        show global variables
        where variable_name like 'read_rnd_buffer_size'
            or variable_name like 'sort_buffer_size'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def get_status(conn):
    sql = """
        show global status
        where variable_name like 'Sort_merge_passes'
            or variable_name like 'Sort_range'
            or variable_name like 'Sort_scan'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def main():
    """The main function. This is where the magic happens."""

    # logic taken from mysqltuner.pl:mysql_stats(), section "Sorting",
    # verified in sync with MySQLTuner

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    mysql_connection = {
        'defaults_file': args.DEFAULTS_FILE,
        'defaults_group': args.DEFAULTS_GROUP,
        'timeout': args.TIMEOUT,
    }
    conn = lib.base.coe(lib.db_mysql.connect(mysql_connection))
    lib.base.coe(lib.db_mysql.check_privileges(conn))

    myvar = lib.db_mysql.lod2dict(get_vars(conn))
    mystat = lib.db_mysql.lod2dict(get_status(conn))
    lib.db_mysql.close(conn)

    # init some vars
    state = STATE_OK
    sections = []
    facts = []
    # All recommendations from all WARN/CRIT paths land here and render once at
    # the end as a `Recommendations:\n* ...` bulleted block, regardless of
    # which combinations of WARN paths fire.
    recommendations = []
    perfdata = ''
    sort_scan = int(mystat['Sort_scan'])
    sort_range = int(mystat['Sort_range'])
    sort_merge_passes = int(mystat['Sort_merge_passes'])
    sort_buffer_size = int(myvar['sort_buffer_size'])
    read_rnd_buffer_size = int(myvar['read_rnd_buffer_size'])

    # analyze data
    total_sorts = sort_scan + sort_range
    # Guard the division locally; an idle server can legitimately have zero
    # sorts so far.
    if total_sorts > 0:
        pct_temp_sort_table = round(sort_merge_passes / total_sorts * 100, 1)
    else:
        pct_temp_sort_table = 0.0

    if total_sorts > 0:
        state = lib.base.get_state(
            pct_temp_sort_table,
            args.WARNING,
            args.CRITICAL,
            _operator='range',
        )
        if state != STATE_OK:
            recommendations.append(
                f'Raise `sort_buffer_size` (currently'
                f' {lib.human.bytes2human(sort_buffer_size)}) to 2 - 8 MiB'
                f' and/or `read_rnd_buffer_size` (currently'
                f' {lib.human.bytes2human(read_rnd_buffer_size)}) to 1 - 4'
                f' MiB so fewer sorts have to spill to a temporary'
                f' merge-sort file. Note: both buffers are allocated per'
                f' session, so raising them globally raises per-thread'
                f' memory. For one-off large sorts, tune per session'
                f' instead (`SET SESSION sort_buffer_size = ...`)'
            )

    # build the message
    if total_sorts == 0:
        facts.append(
            f'No sorts yet ({lib.human.number2human(sort_scan)} full-scan,'
            f' {lib.human.number2human(sort_range)} range)'
        )
    else:
        facts.append(
            f'Sorts requiring a temporary merge-sort file:'
            f' {pct_temp_sort_table}%'
            f' ({lib.human.number2human(sort_merge_passes)} temp sorts'
            f' / {lib.human.number2human(total_sorts)} sorts)'
            f'{lib.base.state2str(state, prefix=" ")}'
        )
        facts.append(
            f'Components: `Sort_scan` ='
            f' {lib.human.number2human(sort_scan)},'
            f' `Sort_range` = {lib.human.number2human(sort_range)}'
        )

    facts_text = '. '.join(facts) + '.'
    if state == STATE_OK:
        # "Everything is ok." leads the OK message so the admin sees the
        # verdict first; the facts follow.
        sections.append('Everything is ok. ' + facts_text)
    else:
        sections.append(facts_text)
    if recommendations:
        sections.append(
            'Recommendations:\n' + '\n'.join(f'* {r}' for r in recommendations)
        )
    msg = '\n\n'.join(sections)

    perfdata += lib.base.get_perfdata(
        'mysql_sort_buffer_size',
        sort_buffer_size,
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_read_rnd_buffer_size',
        read_rnd_buffer_size,
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_pct_temp_sort_table',
        pct_temp_sort_table,
        uom='%',
        warn=args.WARNING,
        crit=args.CRITICAL,
        _min=0,
        _max=100,
    )

    # Per-CONTRIBUTING: emit Sort_scan / Sort_range / Sort_merge_passes (and the
    # derived total) as per-second deltas (computed in-plugin against a local
    # SQLite cache) instead of cumulative `c` counters that force Grafana to do
    # non_negative_difference() per panel.
    rates = lib.db_sqlite.per_second_deltas(
        SQLITE_DB,
        'mysql-sorts',
        {
            'sort_scan': sort_scan,
            'sort_range': sort_range,
            'sort_merge_passes': sort_merge_passes,
        },
    )
    if rates is not None:
        perfdata += lib.base.get_perfdata(
            'mysql_sort_scan_per_second',
            round(rates['sort_scan'], 2),
            _min=0,
        )
        perfdata += lib.base.get_perfdata(
            'mysql_sort_range_per_second',
            round(rates['sort_range'], 2),
            _min=0,
        )
        perfdata += lib.base.get_perfdata(
            'mysql_sort_merge_passes_per_second',
            round(rates['sort_merge_passes'], 2),
            _min=0,
        )
        perfdata += lib.base.get_perfdata(
            'mysql_total_sorts_per_second',
            round(rates['sort_scan'] + rates['sort_range'], 2),
            _min=0,
        )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
