#!/usr/lib64/linuxfabrik-monitoring-plugins/venv/bin/python
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import glob
import os
import sys

import lib.args
import lib.base
import lib.db_mysql
import lib.txt
import lib.version
from lib.globals import STATE_OK, STATE_UNKNOWN, STATE_WARN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026051203'

DESCRIPTION = """Audits MySQL/MariaDB configuration knobs that have a measurable impact
on performance or correctness but no metric of their own. Highlights settings that
silently waste work on tooling-heavy hosts, drop a transaction-isolation guarantee an
admin probably expected, are tuned for the wrong storage class, or hang on as no-op
leftovers in `my.cnf` until the next major upgrade refuses to start with them.

What gets checked:
- `innodb_stats_on_metadata`: when ON, InnoDB refreshes index statistics on every
  `information_schema` query. Hosts with frequent dashboard, backup and
  monitoring queries pay a noticeable CPU cost for this. Recommended OFF.
- `concurrent_insert`: when set to `NEVER` / `0`, MyISAM tables can no longer serve
  SELECTs in parallel with INSERTs. `AUTO` (the modern default) is recommended.
- `innodb_snapshot_isolation` (MariaDB only): under `REPEATABLE-READ`, OFF lets a
  transaction see writes other transactions commit during its lifetime, breaking the
  stable-snapshot guarantee the name `REPEATABLE-READ` implies. ON makes the
  snapshot stable. Default flipped to ON in MariaDB 11.8; before that, the admin had
  to opt in. Other isolation levels and non-MariaDB servers skip this check.
- `innodb_flush_neighbors`: HDD wins from grouping seek-adjacent dirty-page flushes,
  SSD/NVMe pays in extra writes for no latency benefit. Recommended `0` on SSD/NVMe,
  `1` (or `2`) on HDD.
- `innodb_io_capacity`: caps InnoDB's background flushing rate. Default `200` is
  sized for HDD; SSD/NVMe deployments should raise this to `2000` or more so InnoDB
  can flush closer to the disk's IOPS budget.
- Deprecated configuration variables explicitly set via `my.cnf` or `SET GLOBAL`.
  The server tolerates these as no-ops, so they hide easily until the next major
  upgrade ships without them. Compile-time defaults are silent. MariaDB exposes a
  runtime deprecation flag; on MySQL a static list filtered through
  `performance_schema.variables_info.VARIABLE_SOURCE` catches the same cases.

The two storage-aware checks (`innodb_flush_neighbors`, `innodb_io_capacity`) need
to know the disk type. Auto-detection reads `/sys/block/*/queue/rotational` on the
same host the plugin runs on; when run from a remote monitoring host via TCP the
auto-detect cannot see the database server's storage, so pass `--storage-type=ssd`
or `--storage-type=hdd` explicitly, or `--storage-type=skip` to disable those two
checks entirely."""

# Variables MySQL has removed or kept as a no-op. Port of mysqltuner.pl v2.8.41
# check_removed_innodb_variables (MySQL branch). Used only on the MySQL path;
# MariaDB has its own runtime deprecation flag and needs no static list.
REMOVED_VARS_MYSQL = [
    {
        'name': 'innodb_file_format',
        'removed_in': (8, 0, 0),
        'note': 'Barracuda is the only supported format.',
    },
    {
        'name': 'innodb_file_format_check',
        'removed_in': (8, 0, 0),
        'note': 'Barracuda is the only supported format.',
    },
    {
        'name': 'innodb_file_format_max',
        'removed_in': (8, 0, 0),
        'note': 'Barracuda is the only supported format.',
    },
    {
        'name': 'innodb_large_prefix',
        'removed_in': (8, 0, 0),
        'note': 'Always enabled for Barracuda.',
    },
    {
        'name': 'innodb_locks_unsafe_for_binlog',
        'removed_in': (8, 0, 0),
        'note': 'Use READ COMMITTED isolation level instead.',
    },
    {
        'name': 'innodb_support_xa',
        'removed_in': (8, 0, 0),
        'note': 'XA support is now always enabled.',
    },
    {
        'name': 'innodb_undo_logs',
        'removed_in': (8, 0, 0),
        'note': 'Replaced by `innodb_rollback_segments`.',
    },
    {
        'name': 'tx_isolation',
        'removed_in': (8, 0, 0),
        'note': 'Use `transaction_isolation` instead.',
    },
    {
        'name': 'tx_read_only',
        'removed_in': (8, 0, 0),
        'note': 'Use `transaction_read_only` instead.',
    },
    {
        'name': 'innodb_log_file_size',
        'removed_in': (9, 0, 0),
        'note': 'Replaced by `innodb_redo_log_capacity`.',
    },
    {
        'name': 'innodb_log_files_in_group',
        'removed_in': (9, 0, 0),
        'note': 'Replaced by `innodb_redo_log_capacity`.',
    },
    {
        'name': 'innodb_undo_tablespaces',
        'removed_in': (9, 0, 0),
        'note': 'Managed automatically now.',
    },
]

DEFAULT_DEFAULTS_FILE = '/var/spool/icinga2/.my.cnf'
DEFAULT_DEFAULTS_GROUP = 'client'
DEFAULT_STORAGE_TYPE = 'auto'
DEFAULT_TIMEOUT = 3

# Block devices that do not represent persistent storage and would
# either return rotational=0 (incorrect for SSD) or skew Mixed
# detection. Mirrors the skip list in mysqltuner.pl.
_TRANSIENT_BLOCK_PREFIXES = ('loop', 'ram', 'nbd', 'zram')


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--defaults-file',
        help='MySQL/MariaDB cnf file to read user, host and password from. '
        'Example: `--defaults-file=/var/spool/icinga2/.my.cnf`. '
        'Default: %(default)s',
        dest='DEFAULTS_FILE',
        default=DEFAULT_DEFAULTS_FILE,
    )

    parser.add_argument(
        '--defaults-group',
        help=lib.args.help('--defaults-group') + ' Default: %(default)s',
        dest='DEFAULTS_GROUP',
        default=DEFAULT_DEFAULTS_GROUP,
    )

    parser.add_argument(
        '--storage-type',
        help='Storage type of the MySQL data directory. Drives the '
        '`innodb_flush_neighbors` and `innodb_io_capacity` checks. '
        '`auto` reads `/sys/block/*/queue/rotational` on the same host; '
        '`ssd` or `hdd` overrides the detection (use when running from a '
        'remote monitoring host via TCP); `skip` disables both storage-type '
        'checks entirely. '
        'Example: `--storage-type=ssd`. '
        'Default: %(default)s',
        dest='STORAGE_TYPE',
        choices=('auto', 'ssd', 'hdd', 'skip'),
        default=DEFAULT_STORAGE_TYPE,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    args, _ = parser.parse_known_args()
    return args


def detect_storage_type(override):
    """Return one of ('ssd', 'hdd', 'mixed', 'unknown') describing the
    block-device class behind the host's data directory. `override` is
    the CLI argument value: 'ssd' / 'hdd' force the answer, 'skip'
    short-circuits to 'unknown', 'auto' walks `/sys/block`.
    Auto-detection only works on Linux when the plugin runs on the
    MySQL host itself (typically via a local monitoring agent). When
    `/sys/block` is not readable - remote check via TCP, Windows,
    container without `/sys/block` mounted - the result is 'unknown'
    and the downstream storage-aware checks are skipped silently.
    Logic mirrors mysqltuner.
    """
    if override in ('ssd', 'hdd'):
        return override
    if override == 'skip':
        return 'unknown'
    # 'auto' from here on.
    if not os.path.isdir('/sys/block'):
        return 'unknown'
    has_ssd = False
    has_hdd = False
    for block in glob.glob('/sys/block/*'):
        name = os.path.basename(block)
        if name.startswith(_TRANSIENT_BLOCK_PREFIXES):
            continue
        rotational_path = os.path.join(block, 'queue', 'rotational')
        try:
            with open(rotational_path, 'r', encoding='ascii') as f:
                rotational = f.read().strip()
        except (OSError, ValueError):
            continue
        if rotational == '0':
            has_ssd = True
        elif rotational == '1':
            has_hdd = True
    if has_ssd and not has_hdd:
        return 'ssd'
    if has_hdd and not has_ssd:
        return 'hdd'
    if has_ssd and has_hdd:
        return 'mixed'
    return 'unknown'


def find_deprecated_variables_mariadb(conn):
    """Return the list of deprecated variables that the admin has
    explicitly set (in `my.cnf` or via `SET GLOBAL`). MariaDB itself
    flags deprecated entries via `VARIABLE_COMMENT LIKE '%deprecated%'`
    in `INFORMATION_SCHEMA.SYSTEM_VARIABLES`. `GLOBAL_VALUE_ORIGIN` tells
    us where the value came from; `COMPILE-TIME` means default (silent),
    `CONFIG` / `SQL` means the admin set it (alert). Per the MariaDB
    feature-deprecation policy, deprecated startup variables stay
    visible as no-ops for the full LTS support window (5+ years) so this
    runtime check catches stray `/etc/my.cnf.d/*.cnf` entries left over
    from previous distros.
    """
    sql = """
        select VARIABLE_NAME as name,
            GLOBAL_VALUE as value,
            GLOBAL_VALUE_ORIGIN as origin,
            VARIABLE_COMMENT as note
        from INFORMATION_SCHEMA.SYSTEM_VARIABLES
        where (
            VARIABLE_COMMENT like 'Deprecated%'
            or VARIABLE_COMMENT like '%deprecated%'
        )
        and GLOBAL_VALUE_ORIGIN in ('CONFIG', 'SQL')
        ;
    """
    success, rows = lib.db_mysql.select(conn, sql)
    return rows if success else []


def find_deprecated_variables_mysql(conn, myvar):
    """Return the list of removed/deprecated MySQL variables that the
    admin has explicitly set. MySQL does not expose a deprecation flag at
    runtime, so we walk a static list ported from MySQLTuner and
    filter through `performance_schema.variables_info.VARIABLE_SOURCE` to
    skip compile-time defaults. On MySQL releases without
    `variables_info` (pre-8.0) we fall back to plain presence in
    `SHOW VARIABLES`, which matches mysqltuner's original behaviour.
    The `removed_in` version on each entry gates the finding so a still-
    valid pre-removal release (e.g. `innodb_file_format` on MySQL 5.7)
    does not produce a false positive.
    """
    running = lib.version.version((myvar.get('version') or '').lower())

    sources = {}
    success, rows = lib.db_mysql.select(
        conn,
        'select VARIABLE_NAME as name, VARIABLE_SOURCE as source '
        'from performance_schema.variables_info',
    )
    if success:
        sources = {r['name'].lower(): r['source'] for r in rows}

    findings = []
    for entry in REMOVED_VARS_MYSQL:
        if entry['name'] not in myvar:
            continue
        if running < entry['removed_in']:
            # Variable still exists in the running MySQL version and may
            # be a legitimate knob; mysqltuner-aligned skip.
            continue
        source = sources.get(entry['name'].lower())
        # If variables_info is available and the source is COMPILED, the
        # variable is at its compile-time default - silent. If
        # variables_info is unavailable (MySQL < 8.0), report on presence
        # alone (mysqltuner-aligned).
        if source == 'COMPILED':
            continue
        findings.append({
            'name': entry['name'],
            'value': myvar[entry['name']],
            'origin': source or 'unknown',
            'note': entry['note'],
        })
    return findings


def main():
    """The main function. This is where the magic happens."""

    # logic taken from mysqltuner.pl:check_metadata_perf(), the
    # concurrent_insert check in mysql_stats() and
    # check_removed_innodb_variables(), verified in sync with
    # MySQLTuner

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    mysql_connection = {
        'defaults_file': args.DEFAULTS_FILE,
        'defaults_group': args.DEFAULTS_GROUP,
        'timeout': args.TIMEOUT,
    }
    conn = lib.base.coe(lib.db_mysql.connect(mysql_connection))
    lib.base.coe(lib.db_mysql.check_privileges(conn))

    myvar = lib.db_mysql.get_all_variables(conn)
    is_mariadb = 'mariadb' in (myvar.get('version') or '').lower()
    if is_mariadb:
        removed_vars = find_deprecated_variables_mariadb(conn)
    else:
        removed_vars = find_deprecated_variables_mysql(conn, myvar)
    lib.db_mysql.close(conn)

    # Block-device class behind the data directory. Drives the two
    # SSD-aware checks below. Detected once outside the SQL session.
    storage_type = detect_storage_type(args.STORAGE_TYPE)

    # init some vars
    # Perfdata encoding for the two categorical knobs:
    # - innodb_stats_on_metadata: 0 = OFF (good), 1 = ON (warn).
    # - concurrent_insert: 0 = NEVER/0/OFF (warn), 1 = AUTO/1, 2 = ALWAYS/2.
    # Numeric encoding lets Grafana plot config drift and Icinga's perfdata
    # band thresholds (warn/crit) light up the bad value automatically.
    concurrent_insert_map = {
        'NEVER': 0, '0': 0, 'OFF': 0,
        'AUTO': 1, '1': 1,
        'ALWAYS': 2, '2': 2,
    }
    state = STATE_OK
    sections = []
    facts = []
    # All recommendations from all WARN paths land here and render once at the
    # end as a `Recommendations:\n* ...` bulleted block, regardless of which
    # combinations of WARN paths fire.
    recommendations = []
    perfdata = ''

    # analyze data
    # innodb_stats_on_metadata: ON triggers InnoDB stats updates whenever
    # information_schema is queried, which is frequent on tooling-heavy hosts.
    innodb_stats = myvar.get('innodb_stats_on_metadata')
    if innodb_stats is not None:
        if innodb_stats == 'ON':
            state = lib.base.get_worst(state, STATE_WARN)
            facts.append(
                f'`innodb_stats_on_metadata` is `ON`'
                f'{lib.base.state2str(STATE_WARN, prefix=" ")}'
            )
            recommendations.append(
                'Set `innodb_stats_on_metadata` = `OFF` so InnoDB stops'
                ' refreshing index statistics on every `information_schema`'
                ' query'
            )
        else:
            facts.append(f'`innodb_stats_on_metadata` is `{innodb_stats}`')
        perfdata += lib.base.get_perfdata(
            'mysql_innodb_stats_on_metadata',
            1 if innodb_stats == 'ON' else 0,
            warn=1,
            _min=0,
            _max=1,
        )

    # concurrent_insert: NEVER / 0 disables parallel SELECT + INSERT on MyISAM
    # tables. AUTO (the modern default) is recommended.
    conc_ins = myvar.get('concurrent_insert')
    if conc_ins is not None:
        conc_ins_num = concurrent_insert_map.get(conc_ins.upper())
        if conc_ins_num == 0:
            state = lib.base.get_worst(state, STATE_WARN)
            facts.append(
                f'`concurrent_insert` is `{conc_ins}`'
                f'{lib.base.state2str(STATE_WARN, prefix=" ")}'
            )
            recommendations.append(
                'Set `concurrent_insert` = `AUTO` (or `ALWAYS`) so MyISAM'
                ' tables can serve SELECTs in parallel with INSERTs'
            )
        else:
            facts.append(f'`concurrent_insert` is `{conc_ins}`')
        if conc_ins_num is not None:
            # Treat 0 (NEVER) as the warning band so Icinga colours perfdata
            # graphs correctly. Min=0 / max=2 keeps the gauge to the documented
            # enum range.
            perfdata += lib.base.get_perfdata(
                'mysql_concurrent_insert',
                conc_ins_num,
                warn='1:',
                _min=0,
                _max=2,
            )

    # innodb_snapshot_isolation (MariaDB 11.x): with the variable OFF and
    # the global isolation level set to REPEATABLE-READ, transactions can
    # see other transactions' commits during their lifetime. mysqltuner
    # only flags this combination; other isolation levels and servers
    # without the variable (MySQL, older MariaDB) skip silently.
    snapshot_iso = myvar.get('innodb_snapshot_isolation')
    if snapshot_iso is not None:
        transaction_iso = (
            myvar.get('transaction_isolation')
            or myvar.get('tx_isolation')
            or ''
        )
        if (
            snapshot_iso == 'OFF'
            and transaction_iso == 'REPEATABLE-READ'
        ):
            state = lib.base.get_worst(state, STATE_WARN)
            facts.append(
                '`innodb_snapshot_isolation` is `OFF` with '
                '`REPEATABLE-READ`'
                f'{lib.base.state2str(STATE_WARN, prefix=" ")}'
            )
            recommendations.append(
                'Set `innodb_snapshot_isolation = ON` so `REPEATABLE-READ` '
                'transactions get a stable snapshot independent of concurrent '
                'commits'
            )
        else:
            facts.append(
                f'`innodb_snapshot_isolation` is `{snapshot_iso}` '
                f'(isolation `{transaction_iso}`)'
            )

    # innodb_flush_neighbors: 0 is right for SSD/NVMe (no seek savings),
    # 1 or 2 is right for HDD (group adjacent dirty-page flushes). Skip
    # silently on 'mixed' / 'unknown' storage and when the variable is
    # not exposed by the server.
    flush_neighbors = myvar.get('innodb_flush_neighbors')
    if flush_neighbors is not None and storage_type in ('ssd', 'hdd'):
        try:
            flush_val = int(flush_neighbors)
        except (TypeError, ValueError):
            flush_val = None
        if flush_val is not None:
            if storage_type == 'ssd' and flush_val != 0:
                state = lib.base.get_worst(state, STATE_WARN)
                facts.append(
                    f'`innodb_flush_neighbors` is `{flush_val}` on SSD/NVMe'
                    f'{lib.base.state2str(STATE_WARN, prefix=" ")}'
                )
                recommendations.append(
                    'Set `innodb_flush_neighbors = 0` (recommended for SSD/NVMe)'
                )
            elif storage_type == 'hdd' and flush_val == 0:
                state = lib.base.get_worst(state, STATE_WARN)
                facts.append(
                    '`innodb_flush_neighbors` is `0` on HDD'
                    f'{lib.base.state2str(STATE_WARN, prefix=" ")}'
                )
                recommendations.append(
                    'Set `innodb_flush_neighbors = 1` or `2` (recommended for HDD)'
                )
            else:
                facts.append(
                    f'`innodb_flush_neighbors` is `{flush_val}` on '
                    f'{storage_type.upper() if storage_type == "ssd" else "HDD"}'
                )

    # innodb_io_capacity: default 200 is HDD-sized; SSD/NVMe should
    # usually be 2000 or more so InnoDB does not under-utilise the disk.
    io_capacity = myvar.get('innodb_io_capacity')
    if io_capacity is not None and storage_type == 'ssd':
        try:
            io_val = int(io_capacity)
        except (TypeError, ValueError):
            io_val = None
        if io_val is not None and io_val <= 200:
            state = lib.base.get_worst(state, STATE_WARN)
            facts.append(
                f'`innodb_io_capacity` is `{io_val}` on SSD/NVMe'
                f'{lib.base.state2str(STATE_WARN, prefix=" ")}'
            )
            recommendations.append(
                'Set `innodb_io_capacity >= 2000` (recommended for SSD/NVMe)'
            )
        elif io_val is not None:
            facts.append(
                f'`innodb_io_capacity` is `{io_val}` on SSD/NVMe'
            )

    # removed/deprecated variables that the server still exposes - typically
    # stray entries in /etc/my.cnf.d/*.cnf left over from a previous distro
    # version that the server tolerates as a no-op.
    if removed_vars:
        state = lib.base.get_worst(state, STATE_WARN)
        names = ', '.join(f'`{v["name"].lower()}`' for v in removed_vars)
        facts.append(
            f'{len(removed_vars)} deprecated config '
            f'{lib.txt.pluralize("variable", len(removed_vars))} '
            f'explicitly set ({names})'
            f'{lib.base.state2str(STATE_WARN, prefix=" ")}'
        )
        for v in removed_vars:
            origin = v.get('origin') or 'unknown'
            note = (v.get('note') or '').strip()
            # Skip the trailing note entirely when the upstream
            # VARIABLE_COMMENT is empty, so we do not leave a dangling
            # semicolon in the output.
            detail = f'source `{origin}`'
            if note:
                detail = f'{detail}; {note}'
            recommendations.append(
                f'Remove `{v["name"].lower()}` from your `/etc/my.cnf.d/*.cnf` '
                f'({detail})'
            )
    perfdata += lib.base.get_perfdata(
        'mysql_deprecated_config_variables',
        len(removed_vars),
        warn='0',
        _min=0,
    )

    # build the message
    if state == STATE_OK:
        # "Everything is ok." leads the OK message so the admin sees the
        # verdict first; the verified facts follow.
        if facts:
            sections.append('Everything is ok. ' + '. '.join(facts) + '.')
        else:
            sections.append('Everything is ok.')
    else:
        sections.append('. '.join(facts) + '.')

    if recommendations:
        sections.append(
            'Recommendations:\n' + '\n'.join(f'* {r}' for r in recommendations)
        )

    msg = '\n\n'.join(sections)

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
