#!/usr/lib64/linuxfabrik-monitoring-plugins/venv/bin/python
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import sys

import lib.args
import lib.base
import lib.db_mysql
import lib.db_sqlite
import lib.human
from lib.globals import STATE_OK, STATE_UNKNOWN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026051101'

DESCRIPTION = """Checks the hit rate of the MySQL/MariaDB open table cache (`Table_open_cache_hits`
divided by (`Table_open_cache_hits` + `Table_open_cache_misses`); on servers without
`Table_open_cache_hits` it falls back to `Open_tables` / `Opened_tables`). A low hit rate means
`table_open_cache` is too small for the workload and threads have to keep reopening tables.
Alerts when the rate drops below `--warning` / `--critical`. Also reminds the admin that
`open_files_limit` has to stay above `table_open_cache`."""

DEFAULT_DEFAULTS_FILE = '/var/spool/icinga2/.my.cnf'
DEFAULT_DEFAULTS_GROUP = 'client'
# mysqltuner alerts at < 20% hit rate; we keep that as WARN and add a
# CRIT escalation at < 10% so the plugin state actually goes red when the
# cache is severely undersized. Nagios range form `N:` = "OK range is N to
# infinity"; values below N trigger the threshold.
DEFAULT_WARN = '20:'
DEFAULT_CRIT = '10:'
DEFAULT_TIMEOUT = 3

SQLITE_DB = 'linuxfabrik-monitoring-plugins-mysql-table-cache.db'


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '-c',
        '--critical',
        help=lib.args.help('--critical')
        + ' Supports Nagios ranges. Default: %(default)s',
        dest='CRITICAL',
        default=DEFAULT_CRIT,
    )

    parser.add_argument(
        '--defaults-file',
        help='MySQL/MariaDB cnf file to read user, host and password from. '
        'Example: `--defaults-file=/var/spool/icinga2/.my.cnf`. '
        'Default: %(default)s',
        dest='DEFAULTS_FILE',
        default=DEFAULT_DEFAULTS_FILE,
    )

    parser.add_argument(
        '--defaults-group',
        help=lib.args.help('--defaults-group') + ' Default: %(default)s',
        dest='DEFAULTS_GROUP',
        default=DEFAULT_DEFAULTS_GROUP,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '-w',
        '--warning',
        help=lib.args.help('--warning')
        + ' Supports Nagios ranges. Default: %(default)s',
        dest='WARNING',
        default=DEFAULT_WARN,
    )

    args, _ = parser.parse_known_args()
    return args


def get_vars(conn):
    # Do not implement `get_all_vars()`, just fetch the ones we need for this check.
    # Without the GLOBAL modifier, SHOW VARIABLES displays the values that are used for
    # the current connection to MariaDB.
    sql = """
        show global variables
        where variable_name like 'open_files_limit'
            or variable_name like 'table_open_cache'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def get_status(conn):
    sql = """
        show global status
        where variable_name like 'Open_tables'
            or variable_name like 'Opened_tables'
            or variable_name like 'Table_open_cache_hits'
            or variable_name like 'Table_open_cache_misses'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def main():
    """The main function. This is where the magic happens."""

    # logic taken from mysqltuner.pl:mysql_stats(), section "Table cache",
    # verified in sync with MySQLTuner (the hit-rate formula, the
    # `Opened_tables > 0` calc guard, the `Open_tables > 0` check guard,
    # and the "increase `table_open_cache` / keep `open_files_limit` above
    # `table_open_cache`" recommendation are unchanged upstream since the
    # original port). Our --warning default 20% matches mysqltuner; --critical
    # 10% is a Linuxfabrik addition so the plugin state escalates to CRIT
    # before the cache is essentially useless.

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    mysql_connection = {
        'defaults_file': args.DEFAULTS_FILE,
        'defaults_group': args.DEFAULTS_GROUP,
        'timeout': args.TIMEOUT,
    }
    conn = lib.base.coe(lib.db_mysql.connect(mysql_connection))
    lib.base.coe(lib.db_mysql.check_privileges(conn))

    myvar = lib.db_mysql.lod2dict(get_vars(conn))
    mystat = lib.db_mysql.lod2dict(get_status(conn))
    lib.db_mysql.close(conn)

    # init some vars
    state = STATE_OK
    sections = []
    facts = []
    # All recommendations from all WARN/CRIT paths land here and render once
    # at the end as a `Recommendations:\n* ...` bulleted block, regardless of
    # which combinations of WARN/CRIT paths fire.
    recommendations = []
    perfdata = ''

    open_tables = int(mystat['Open_tables'])
    opened_tables = int(mystat['Opened_tables'])
    table_open_cache = int(myvar['table_open_cache'])
    open_files_limit = int(myvar['open_files_limit'])
    cache_hits_raw = mystat.get('Table_open_cache_hits')
    has_modern_counters = cache_hits_raw is not None
    cache_hits = int(cache_hits_raw) if has_modern_counters else 0
    cache_misses = int(mystat.get('Table_open_cache_misses') or 0)

    # analyze data
    # On a freshly booted server with no traffic, `Opened_tables = 0` is the
    # honest "no data yet" answer; mysqltuner pins the rate to 100 in that
    # case to avoid the false-positive low-hit-rate alert. We do the same.
    if opened_tables == 0:
        hit_rate = 100.0
        hits_for_msg = 0
        requests_for_msg = 0
    elif has_modern_counters:
        total_requests = cache_hits + cache_misses
        hit_rate = (
            round(cache_hits / total_requests * 100, 1) if total_requests > 0 else 100.0
        )
        hits_for_msg = cache_hits
        requests_for_msg = total_requests
    else:
        hit_rate = round(open_tables / opened_tables * 100, 1)
        hits_for_msg = open_tables
        requests_for_msg = opened_tables

    # The `Open_tables > 0` guard mirrors mysqltuner: on a server that has
    # not yet opened a single table the hit rate is meaningless. On modern
    # MySQL/MariaDB this is essentially always true outside of CI fixtures.
    if open_tables > 0:
        state = lib.base.get_state(
            hit_rate,
            args.WARNING,
            args.CRITICAL,
            _operator='range',
        )
        if state != STATE_OK:
            recommendations.append(
                f'Raise `table_open_cache` (currently {table_open_cache}) '
                f'gradually; verify that `open_files_limit` '
                f'({open_files_limit}) stays above it. '
                f'On MyISAM-heavy workloads `table_open_cache` is the '
                f'classic scalability bottleneck (`InnoDB` is not affected; '
                f'see https://bugs.mysql.com/bug.php?id=49177, fixed in '
                f'MySQL 5.7.9+)'
            )

    # build the message
    if open_tables > 0:
        facts.append(
            f'Table cache hit rate: {hit_rate}%'
            f' ({lib.human.number2human(hits_for_msg)} hits'
            f' / {lib.human.number2human(requests_for_msg)} requests)'
            f'{lib.base.state2str(state, prefix=" ")}'
        )
    else:
        facts.append('no table opens yet (`Open_tables` = 0); hit-rate check skipped')
    facts.append(
        f'`table_open_cache` = {table_open_cache},'
        f' `open_files_limit` = {open_files_limit}'
    )
    facts_text = '. '.join(facts) + '.'
    if state == STATE_OK:
        sections.append('Everything is ok. ' + facts_text)
    else:
        sections.append(facts_text)
    if recommendations:
        sections.append(
            'Recommendations:\n' + '\n'.join(f'* {r}' for r in recommendations)
        )
    msg = '\n\n'.join(sections)

    perfdata += lib.base.get_perfdata(
        'mysql_open_files_limit',
        open_files_limit,
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_table_open_cache',
        table_open_cache,
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_open_tables',
        open_tables,
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_opened_tables',
        opened_tables,
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_table_cache_hit_rate',
        hit_rate,
        uom='%',
        warn=args.WARNING,
        crit=args.CRITICAL,
        _min=0,
        _max=100,
    )

    # Per-CONTRIBUTING: emit cache hits/misses as per-second deltas (computed
    # in-plugin against a local SQLite cache) instead of cumulative `c`
    # counters that force Grafana to do non_negative_difference() per panel.
    # Only available when the server exposes the modern counters.
    if has_modern_counters:
        rates = lib.db_sqlite.per_second_deltas(
            SQLITE_DB,
            'mysql-table-cache',
            {
                'table_open_cache_hits': cache_hits,
                'table_open_cache_misses': cache_misses,
            },
        )
        if rates is not None:
            perfdata += lib.base.get_perfdata(
                'mysql_table_open_cache_hits_per_second',
                round(rates['table_open_cache_hits'], 2),
                _min=0,
            )
            perfdata += lib.base.get_perfdata(
                'mysql_table_open_cache_misses_per_second',
                round(rates['table_open_cache_misses'], 2),
                _min=0,
            )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
