#!/usr/lib64/linuxfabrik-monitoring-plugins/venv/bin/python
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import sys

import lib.args
import lib.base
import lib.db_mysql
import lib.db_sqlite
import lib.human
from lib.globals import STATE_OK, STATE_UNKNOWN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026051301'

DESCRIPTION = """Checks the percentage of MySQL/MariaDB temporary tables that had to spill to disk
(`Created_tmp_disk_tables` divided by `Created_tmp_tables`). A high percentage means queries are
materialising temporary tables larger than the smaller of `tmp_table_size` and `max_heap_table_size`,
and the server falls back to a disk-based temporary table. Alerts when the percentage crosses
`--warning` / `--critical`. Recommendations depend on whether the effective temp-table cap is
already large (`>= 256 MiB`, mysqltuner's cut-off); in that case raising the cap further does not
help and the underlying queries (typically `SELECT DISTINCT` without `LIMIT`) need attention."""

DEFAULT_DEFAULTS_FILE = '/var/spool/icinga2/.my.cnf'
DEFAULT_DEFAULTS_GROUP = 'client'
# mysqltuner alerts when > 25% of temp tables spill to disk. We keep
# that as WARN (Nagios range `25` = alert if value > 25) and add a CRIT
# escalation at > 50% so the plugin state actually goes red when the workload
# is severely temp-table-bound.
DEFAULT_WARN = '25'
DEFAULT_CRIT = '50'
DEFAULT_TIMEOUT = 3

# mysqltuner's cut-off: above this, raising `tmp_table_size` /
# `max_heap_table_size` further is not the right action.
TMP_TABLE_LARGE_THRESHOLD = 256 * 1024 * 1024

SQLITE_DB = 'linuxfabrik-monitoring-plugins-mysql-temp-tables.db'


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '-c',
        '--critical',
        help=lib.args.help('--critical')
        + ' Supports Nagios ranges. Default: %(default)s',
        dest='CRIT',
        default=DEFAULT_CRIT,
    )

    parser.add_argument(
        '--defaults-file',
        help='MySQL/MariaDB cnf file to read user, host and password from. '
        'Example: `--defaults-file=/var/spool/icinga2/.my.cnf`. '
        'Default: %(default)s',
        dest='DEFAULTS_FILE',
        default=DEFAULT_DEFAULTS_FILE,
    )

    parser.add_argument(
        '--defaults-group',
        help=lib.args.help('--defaults-group') + ' Default: %(default)s',
        dest='DEFAULTS_GROUP',
        default=DEFAULT_DEFAULTS_GROUP,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '-w',
        '--warning',
        help=lib.args.help('--warning')
        + ' Supports Nagios ranges. Default: %(default)s',
        dest='WARN',
        default=DEFAULT_WARN,
    )

    args, _ = parser.parse_known_args()
    return args


def get_vars(conn):
    sql = """
        show global variables
        where variable_name like 'max_heap_table_size'
            or variable_name like 'tmp_table_size'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def get_status(conn):
    sql = """
        show global status
        where variable_name like 'Created_tmp_disk_tables'
            or variable_name like 'Created_tmp_tables'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def main():
    """The main function. This is where the magic happens."""

    # logic taken from mysqltuner.pl:mysql_stats(), section "Temporary tables",
    # verified in sync with MySQLTuner (the > 25% threshold, the
    # 256 MiB cut-off between "raise the cap" and "the cap is already large
    # enough" recommendation paths, and the `max_tmp_table_size` formula are
    # unchanged upstream since the original port). Our --warning default
    # matches mysqltuner; --critical 50 is a Linuxfabrik addition so the
    # plugin state actually goes red on severe temp-table churn.

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    mysql_connection = {
        'defaults_file': args.DEFAULTS_FILE,
        'defaults_group': args.DEFAULTS_GROUP,
        'timeout': args.TIMEOUT,
    }
    conn = lib.base.coe(lib.db_mysql.connect(mysql_connection))
    lib.base.coe(lib.db_mysql.check_privileges(conn))

    myvar = lib.db_mysql.lod2dict(get_vars(conn))
    mystat = lib.db_mysql.lod2dict(get_status(conn))
    lib.db_mysql.close(conn)

    # init some vars
    state = STATE_OK
    sections = []
    facts = []
    # All recommendations from all WARN/CRIT paths land here and render once
    # at the end as a `Recommendations:\n* ...` bulleted block.
    recommendations = []
    perfdata = ''

    created_tmp_tables = int(mystat['Created_tmp_tables'])
    created_tmp_disk_tables = int(mystat['Created_tmp_disk_tables'])
    tmp_table_size = int(myvar['tmp_table_size'])
    max_heap_table_size = int(myvar['max_heap_table_size'])
    # The effective in-memory limit for an internal temp table is the
    # smaller of `tmp_table_size` and `max_heap_table_size`; once a single
    # temp table exceeds it, MySQL/MariaDB spills the table to disk
    # (MySQL Reference Manual on `tmp_table_size`; MariaDB Knowledge Base
    # on `tmp_table_size`). mysqltuner's `max_tmp_table_size` calculation
    # in `mysqltuner.pl:mysql_stats()` mirrors that with `MIN(...)`.
    effective_cap = min(tmp_table_size, max_heap_table_size)

    # analyze data
    # On a freshly booted server with no traffic `Created_tmp_tables` can be
    # zero; mysqltuner pins the percentage to 0 in that case to avoid the
    # false-positive low-rate alert.
    pct_temp_disk = (
        round(created_tmp_disk_tables / created_tmp_tables * 100, 1)
        if created_tmp_tables > 0
        else 0.0
    )

    # The `Created_tmp_tables > 0` guard mirrors mysqltuner: on a server that
    # has not yet materialised a single temporary table the percentage is
    # meaningless.
    if created_tmp_tables > 0:
        state = lib.base.get_state(
            pct_temp_disk,
            args.WARN,
            args.CRIT,
            _operator='range',
        )
        if state != STATE_OK:
            if effective_cap < TMP_TABLE_LARGE_THRESHOLD:
                recommendations.append(
                    f'Raise both `tmp_table_size` (currently '
                    f'{lib.human.bytes2human(tmp_table_size)}) and '
                    f'`max_heap_table_size` (currently '
                    f'{lib.human.bytes2human(max_heap_table_size)}) toward '
                    f'{lib.human.bytes2human(TMP_TABLE_LARGE_THRESHOLD)} '
                    f'(mysqltuner cut-off); keep them equal because the '
                    f'effective per-table cap is the smaller of the two'
                )
                recommendations.append(
                    'These limits apply per implicit temp table, not as a '
                    'shared pool. Size them just above the largest single '
                    'temp table your workload actually generates; going '
                    'higher does not help. Plan RAM headroom for that limit '
                    'multiplied by the number of concurrent sessions '
                    'building temp tables, so the server does not run out '
                    'of memory under load. Use the slow log or '
                    '`sys.x$statements_with_temp_tables` in '
                    '`performance_schema` to find typical temp-table sizes'
                )
            else:
                recommendations.append(
                    f'The effective temp-table cap '
                    f'({lib.human.bytes2human(effective_cap)}) is already at '
                    f'or above the {lib.human.bytes2human(TMP_TABLE_LARGE_THRESHOLD)} '
                    f'mysqltuner cut-off. Raising it further will not help; '
                    f'reduce the working set of the queries instead'
                )
            recommendations.append(
                'Audit `SELECT DISTINCT` and `GROUP BY` queries that run '
                'without a `LIMIT` clause; those are the classic source of '
                'oversized temporary tables'
            )

    # build the message
    if created_tmp_tables > 0:
        facts.append(
            f'Temporary tables created on disk: {pct_temp_disk}% '
            f'({lib.human.number2human(created_tmp_disk_tables)} on disk '
            f'/ {lib.human.number2human(created_tmp_tables)} total)'
            f'{lib.base.state2str(state, prefix=" ")}'
        )
    else:
        facts.append(
            'no temporary tables created yet (`Created_tmp_tables` = 0); '
            'percentage check skipped'
        )
    facts.append(
        f'`tmp_table_size` = {lib.human.bytes2human(tmp_table_size)}, '
        f'`max_heap_table_size` = '
        f'{lib.human.bytes2human(max_heap_table_size)}'
    )
    facts_text = '. '.join(facts) + '.'
    if state == STATE_OK:
        sections.append('Everything is ok. ' + facts_text)
    else:
        sections.append(facts_text)
    if recommendations:
        sections.append(
            'Recommendations:\n' + '\n'.join(f'* {r}' for r in recommendations)
        )
    msg = '\n\n'.join(sections)

    perfdata += lib.base.get_perfdata(
        'mysql_max_heap_table_size',
        max_heap_table_size,
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_tmp_table_size',
        tmp_table_size,
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_max_tmp_table_size',
        effective_cap,
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_pct_temp_disk',
        pct_temp_disk,
        uom='%',
        warn=args.WARN,
        crit=args.CRIT,
        _min=0,
        _max=100,
    )

    # Per-CONTRIBUTING: emit the cumulative counters as per-second deltas
    # (computed in-plugin against a local SQLite cache) instead of `uom='c'`
    # continuous counters that force Grafana to do non_negative_difference()
    # per panel.
    rates = lib.db_sqlite.per_second_deltas(
        SQLITE_DB,
        'mysql-temp-tables',
        {
            'created_tmp_disk_tables': created_tmp_disk_tables,
            'created_tmp_tables': created_tmp_tables,
        },
    )
    if rates is not None:
        perfdata += lib.base.get_perfdata(
            'mysql_created_tmp_disk_tables_per_second',
            round(rates['created_tmp_disk_tables'], 2),
            _min=0,
        )
        perfdata += lib.base.get_perfdata(
            'mysql_created_tmp_tables_per_second',
            round(rates['created_tmp_tables'], 2),
            _min=0,
        )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
