#!/usr/lib64/linuxfabrik-monitoring-plugins/venv/bin/python
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import sys

import lib.args
import lib.base
import lib.db_mysql
import lib.human
from lib.globals import STATE_OK, STATE_UNKNOWN, STATE_WARN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026051005'

DESCRIPTION = """Checks index sizes, fragmentation, and consistent engine and collation usage across
all schemas in MySQL/MariaDB. Alerts on mixed storage engines or collations within a
single schema."""

DEFAULT_DEFAULTS_FILE = '/var/spool/icinga2/.my.cnf'
DEFAULT_DEFAULTS_GROUP = 'client'
DEFAULT_IGNORE_SCHEMAS = ''
DEFAULT_IGNORE_TABLES = ''
DEFAULT_LENGTHY = False
DEFAULT_TIMEOUT = 3

# System schemas: skipped during all aggregates and per-schema checks. mysqltuner
# excludes the same set; `percona` is added because the Percona Server Toolkit installs a
# `percona` schema for its own bookkeeping and it is not user data.
SYSTEM_SCHEMAS = ('information_schema', 'mysql', 'percona', 'performance_schema', 'sys')


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--defaults-file',
        help='MySQL/MariaDB cnf file to read user, host and password '
        'from (instead of specifying them on the command line). '
        'Example: `/var/spool/icinga2/.my.cnf`. Default: %(default)s',
        dest='DEFAULTS_FILE',
        default=DEFAULT_DEFAULTS_FILE,
    )

    parser.add_argument(
        '--defaults-group',
        help=lib.args.help('--defaults-group') + ' Default: %(default)s',
        dest='DEFAULTS_GROUP',
        default=DEFAULT_DEFAULTS_GROUP,
    )

    parser.add_argument(
        '--ignore-schemas',
        help='Regular expression matched against `SCHEMA_NAME` (case-sensitive). Schemas '
        'whose name matches are skipped entirely (no aggregate contribution, no checks). '
        'Useful for known-mixed schemas that the admin cannot or does not want to fix '
        '(common with Icinga Director / Icinga Web 2 / Icinga DB schemas, which mix '
        'utf8 / utf8mb4 collations by design). System schemas are skipped unconditionally. '
        'Default: %(default)s. '
        'Example: `--ignore-schemas="^(icinga_director|icingaweb2|icingadb)$"`',
        dest='IGNORE_SCHEMAS',
        default=DEFAULT_IGNORE_SCHEMAS,
    )

    parser.add_argument(
        '--ignore-tables',
        help='Regular expression matched against `TABLE_NAME` (case-sensitive). Tables '
        'whose name matches are excluded from every aggregate and every per-schema check. '
        'Useful for muting noisy temporary or backup tables that legitimately differ from '
        'the schema-wide engine/collation. '
        'Default: %(default)s. '
        'Example: `--ignore-tables="^(tmp_|backup_)"`',
        dest='IGNORE_TABLES',
        default=DEFAULT_IGNORE_TABLES,
    )

    parser.add_argument(
        '--lengthy',
        help=lib.args.help('--lengthy') + ' Default: %(default)s',
        dest='LENGTHY',
        action='store_true',
        default=DEFAULT_LENGTHY,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    args, _ = parser.parse_known_args()
    return args


def main():
    """The main function. This is where the magic happens."""

    # logic taken from mysqltuner.pl:mysql_databases(), verified in sync with
    # MySQLTuner (the per-database engine/collation/charset consistency
    # checks and the index-vs-data-size check are unchanged upstream since the
    # original port).
    #
    # Intentional deviation: the index-vs-data-size check additionally requires
    # one of the two sizes to exceed 10 MB. Tiny schemas (under a few MB)
    # routinely have proportionally larger indices than data, which is not
    # actionable for an admin and would generate constant noise.

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    mysql_connection = {
        'defaults_file': args.DEFAULTS_FILE,
        'defaults_group': args.DEFAULTS_GROUP,
        'timeout': args.TIMEOUT,
    }
    conn = lib.base.coe(lib.db_mysql.connect(mysql_connection))
    lib.base.coe(lib.db_mysql.check_privileges(conn, 'SELECT'))

    # init some vars
    state = STATE_OK
    perfdata = ''

    # The --ignore-tables and --ignore-schemas regexes are interpolated into the SQL
    # string. The values come from the CLI (admin-controlled), and any embedded single
    # quotes are escaped to keep the surrounding SQL literal valid. MySQL itself rejects
    # malformed regex patterns at query time and `lib.base.coe()` surfaces the error.
    ignore_tables_clause = ''
    if args.IGNORE_TABLES:
        safe_regex = args.IGNORE_TABLES.replace("'", "''")
        ignore_tables_clause = f" and TABLE_NAME NOT REGEXP '{safe_regex}'"
    ignore_schemas_clause = ''
    if args.IGNORE_SCHEMAS:
        safe_regex = args.IGNORE_SCHEMAS.replace("'", "''")
        ignore_schemas_clause = f" and SCHEMA_NAME NOT REGEXP '{safe_regex}'"

    # SCHEMA_NAME and others must be uppercase due to MySQL 8+
    excluded = ', '.join(f'"{s}"' for s in SYSTEM_SCHEMAS)
    sql = f'''
        select SCHEMA_NAME
        from information_schema.schemata
        where SCHEMA_NAME not in ({excluded})
            {ignore_schemas_clause};
    '''  # nosec B608
    dblist = lib.base.coe(lib.db_mysql.select(conn, sql))

    results = {
        'index': '',
        'engine': '',
        'collation': '',
        'colcharset': '',
        'colcollation': '',
    }
    empty_schemas = []
    per_schema_rows = []
    total_data_size = 0
    total_index_size = 0
    total_rows = 0
    total_tables = 0

    # schema names come from MySQL itself (information_schema.schemata), not from user input
    for schema in dblist:
        schema_name = schema['SCHEMA_NAME']
        sql = f'''
            select TABLE_SCHEMA,
                sum(TABLE_ROWS) as sum_rows,
                sum(DATA_LENGTH) as sum_data,
                sum(INDEX_LENGTH) as sum_index,
                sum(DATA_LENGTH+INDEX_LENGTH) as sum_data_index,
                count(distinct ENGINE) as cnt_engines,
                count(TABLE_NAME) as cnt_tables,
                count(distinct(TABLE_COLLATION)) as cnt_collations
            from information_schema.tables
            where TABLE_SCHEMA="{schema_name}"
                {ignore_tables_clause}
            group by TABLE_SCHEMA
            order by TABLE_SCHEMA;
        '''  # nosec B608
        dbinfo = lib.base.coe(lib.db_mysql.select(conn, sql, fetchone=True))
        if dbinfo is None:
            empty_schemas.append(schema_name)
            continue

        # MySQL returns SQL NULL as Python None (not the string "NULL"); use is/is not
        # None for the guards.
        sum_data = dbinfo['sum_data']
        sum_index = dbinfo['sum_index']
        sum_rows = dbinfo['sum_rows']
        sum_total = dbinfo['sum_data_index']
        cnt_tables = dbinfo['cnt_tables']

        total_data_size += int(sum_data or 0)
        total_index_size += int(sum_index or 0)
        total_rows += int(sum_rows or 0)
        total_tables += int(cnt_tables or 0)

        if (
            sum_data is not None
            and sum_index is not None
            and (sum_data > 10 * 1024 * 1024 or sum_index > 10 * 1024 * 1024)
            and sum_data < sum_index
        ):
            idx_h = lib.human.bytes2human(sum_index)
            data_h = lib.human.bytes2human(sum_data)
            results['index'] += f'{schema_name} ({idx_h} / {data_h}), '

        if dbinfo['cnt_engines'] > 1:
            results['engine'] += f'{schema_name} ({dbinfo["cnt_engines"]}x), '

        if dbinfo['cnt_collations'] > 1:
            results['collation'] += f'{schema_name} ({dbinfo["cnt_collations"]}x), '

        sql = f'''
            select distinct(CHARACTER_SET_NAME)
            from information_schema.COLUMNS
            where CHARACTER_SET_NAME is not null
                and TABLE_SCHEMA = "{schema_name}"
                {ignore_tables_clause};
        '''  # nosec B608
        distinct_column_charset = lib.base.coe(lib.db_mysql.select(conn, sql))
        if len(distinct_column_charset) > 1:
            results['colcharset'] += (
                f'{schema_name} ({len(distinct_column_charset)}x), '
            )

        sql = f'''
            select distinct(COLLATION_NAME)
            from information_schema.COLUMNS
            where COLLATION_NAME is not null
                and TABLE_SCHEMA = "{schema_name}"
                {ignore_tables_clause};
        '''  # nosec B608
        distinct_column_collation = lib.base.coe(lib.db_mysql.select(conn, sql))
        if len(distinct_column_collation) > 1:
            results['colcollation'] += (
                f'{schema_name} ({len(distinct_column_collation)}x), '
            )

        # Collect per-schema details for the optional --lengthy table.
        sql = f'''
            select distinct ENGINE
            from information_schema.tables
            where TABLE_SCHEMA="{schema_name}"
                and ENGINE is not null
                {ignore_tables_clause};
        '''  # nosec B608
        engine_rows = lib.base.coe(lib.db_mysql.select(conn, sql))
        engines_list = sorted(r['ENGINE'] for r in engine_rows)

        sql = f'''
            select distinct TABLE_COLLATION
            from information_schema.tables
            where TABLE_SCHEMA="{schema_name}"
                and TABLE_COLLATION is not null
                {ignore_tables_clause};
        '''  # nosec B608
        tcoll_rows = lib.base.coe(lib.db_mysql.select(conn, sql))
        tcoll_list = sorted(r['TABLE_COLLATION'] for r in tcoll_rows)

        ccharset_list = sorted(r['CHARACTER_SET_NAME'] for r in distinct_column_charset)
        ccoll_list = sorted(r['COLLATION_NAME'] for r in distinct_column_collation)

        # Compact issue summary for the default (non-lengthy) table.
        issues = []
        if dbinfo['cnt_engines'] > 1:
            issues.append(f'{dbinfo["cnt_engines"]} engines')
        if len(tcoll_list) > 1:
            issues.append(f'{len(tcoll_list)} table collations')
        if len(ccharset_list) > 1:
            issues.append(f'{len(ccharset_list)} column charsets')
        if len(ccoll_list) > 1:
            issues.append(f'{len(ccoll_list)} column collations')

        per_schema_rows.append({
            'schema': schema_name,
            'tables': cnt_tables or 0,
            'rows': sum_rows or 0,
            'data': lib.human.bytes2human(sum_data or 0),
            'index': lib.human.bytes2human(sum_index or 0),
            'total': lib.human.bytes2human(sum_total or 0),
            'engines': ', '.join(engines_list) or '-',
            'table_coll': ', '.join(tcoll_list) or '-',
            'col_charsets': ', '.join(ccharset_list) or '-',
            'col_coll': ', '.join(ccoll_list) or '-',
            'issues': ', '.join(issues) if issues else 'OK',
        })

    lib.db_mysql.close(conn)

    # Build the message in independent sections, then join with a single blank line
    # between each. This guarantees no `\n\n\n` (double blank lines) appear in the
    # final output regardless of which optional sections fire.

    # Section 1: header + bullet findings, or the OK summary
    bullets = []
    if results['index']:
        bullets.append(f'* Index size is larger than data size: {results["index"][:-2]}')
    if results['engine']:
        bullets.append(
            f'* Mixed storage engines (use one engine for all tables in a schema):'
            f' {results["engine"][:-2]}'
        )
    if results['collation']:
        bullets.append(
            f'* Mixed table collations (use one collation for all tables in a schema):'
            f' {results["collation"][:-2]}'
        )
    if results['colcharset']:
        bullets.append(
            f'* Mixed column charsets (use one charset for all text-like columns'
            f' if possible): {results["colcharset"][:-2]}'
        )
    if results['colcollation']:
        bullets.append(
            f'* Mixed column collations (use one collation for all text-like columns'
            f' if possible): {results["colcollation"][:-2]}'
        )

    sections = []
    if bullets:
        state = STATE_WARN
        sections.append('There are warnings.\n\n' + '\n'.join(bullets))
    else:
        # "Everything is ok." leads so admins see the verdict first; the
        # scanned-scope details follow. Format mirrors mysqltuner's
        # per-database stats but condensed to a single line.
        sections.append(
            f'Everything is ok.'
            f' {len(dblist)} user schema(s) scanned,'
            f' {total_tables} table(s),'
            f' {lib.human.number2human(total_rows)} rows,'
            f' {lib.human.bytes2human(total_data_size)} data,'
            f' {lib.human.bytes2human(total_index_size)} indices.'
        )

    # Section 2: empty-schemas info (common: lazy-init apps, fresh installs,
    # migration leftovers); does not change state, mirroring our "alert on the
    # actionable" stance.
    if empty_schemas:
        sections.append(
            f'Note: {len(empty_schemas)} empty schema(s) (no tables):'
            f' {", ".join(empty_schemas)}.'
        )

    # Section 3: per-schema breakdown table. Without --lengthy: compact summary
    # (Schema | Tables | Total Size | Issues). With --lengthy: full breakdown,
    # mirroring the per-database information mysqltuner emits when invoked with
    # `--dbstat`.
    if per_schema_rows:
        if args.LENGTHY:
            keys = [
                'schema', 'tables', 'rows', 'data', 'index', 'total',
                'engines', 'table_coll', 'col_charsets', 'col_coll',
            ]
            headers = [
                'Schema', 'Tables', 'Rows', 'Data', 'Index', 'Total',
                'Engines', 'Table Collations', 'Column Charsets', 'Column Collations',
            ]
        else:
            keys = ['schema', 'tables', 'total', 'issues']
            headers = ['Schema', 'Tables', 'Size', 'Issues']
        sections.append(lib.base.get_table(per_schema_rows, keys, header=headers))

    msg = '\n\n'.join(sections)

    perfdata += lib.base.get_perfdata('mysql_database_count', len(dblist), _min=0)
    perfdata += lib.base.get_perfdata(
        'mysql_total_data_size',
        total_data_size,
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_total_index_size',
        total_index_size,
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata('mysql_total_rows', total_rows, _min=0)
    perfdata += lib.base.get_perfdata('mysql_total_tables', total_tables, _min=0)

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
