#!/usr/lib64/linuxfabrik-monitoring-plugins/venv/bin/python
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import os
import re
import sys

import lib.args
import lib.base
import lib.db_mysql
import lib.human
from lib.globals import STATE_OK, STATE_UNKNOWN, STATE_WARN

try:
    import psutil

    HAVE_PSUTIL = True
except ImportError:
    HAVE_PSUTIL = False


__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026051101'

DESCRIPTION = """Estimates MySQL/MariaDB memory consumption and compares it to the host's
physical RAM. Reports the currently-reached usage (server-wide buffers + per-thread
buffers * `Max_used_connections` + Performance Schema memory + Galera GCache if
present) and the theoretical worst-case peak (same formula but with `max_connections`).
Also surfaces the RAM consumed by non-database processes on the host so admins can
see when MySQL plus the rest of the system would exceed physical memory.
Alerts when used or peak memory crosses the `--warning` / `--critical` thresholds,
when peak + other-process memory exceeds physical RAM, and when MySQL would allocate
more than 2 GiB on a 32-bit system."""

DEFAULT_DEFAULTS_FILE = '/var/spool/icinga2/.my.cnf'
DEFAULT_DEFAULTS_GROUP = 'client'
DEFAULT_TIMEOUT = 3
# % of physical RAM at which `pct_max_used_memory` (the currently-reached
# footprint) turns WARN / CRIT. Default 85/95 matches mysqltuner.
DEFAULT_WARN = '85'
DEFAULT_CRIT = '95'

# MySQL/MariaDB allocates each buffer from the 32-bit address space (~4 GiB
# practical limit; mysqltuner uses 2 GiB as the conservative threshold). On a
# 32-bit binary, anything above that risks instability.
ADDR_LIMIT_32BIT = 2 * 1024 * 1024 * 1024


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '-c',
        '--critical',
        help='CRIT threshold for the percentage of physical RAM consumed by '
        'the reached MySQL memory footprint (server_buffers + '
        'per_thread_buffers * Max_used_connections + Performance Schema + '
        'Galera GCache). '
        'Supports Nagios ranges. '
        'Default: %(default)s',
        dest='CRITICAL',
        default=DEFAULT_CRIT,
    )

    parser.add_argument(
        '--defaults-file',
        help='MySQL/MariaDB cnf file to read user, host and password from. '
        'Example: `--defaults-file=/var/spool/icinga2/.my.cnf`. '
        'Default: %(default)s',
        dest='DEFAULTS_FILE',
        default=DEFAULT_DEFAULTS_FILE,
    )

    parser.add_argument(
        '--defaults-group',
        help=lib.args.help('--defaults-group') + ' Default: %(default)s',
        dest='DEFAULTS_GROUP',
        default=DEFAULT_DEFAULTS_GROUP,
    )

    parser.add_argument(
        '--lengthy',
        help='Append a full memory breakdown table to the output (each '
        'contributing buffer + the calculation that produces server_buffers, '
        'per_thread_buffers, max_used_memory and max_peak_memory). Useful '
        'when a WARNING/CRITICAL fires and you need to see which buffer '
        'dominates the footprint.',
        dest='LENGTHY',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '-w',
        '--warning',
        help='WARN threshold for the percentage of physical RAM consumed by '
        'the reached MySQL memory footprint. '
        'Supports Nagios ranges. '
        'Default: %(default)s',
        dest='WARNING',
        default=DEFAULT_WARN,
    )

    args, _ = parser.parse_known_args()
    return args


def get_vars(conn):
    # Pull only the variables we actually need rather than implementing a
    # full `get_all_vars()`. SHOW VARIABLES without GLOBAL is connection-local
    # but we only read session-shared knobs here.
    sql = """
        show global variables
        where variable_name like 'aria_pagecache_buffer_size'
            or variable_name like 'innodb_additional_mem_pool_size'
            or variable_name like 'innodb_buffer_pool_size'
            or variable_name like 'innodb_log_buffer_size'
            or variable_name like 'join_buffer_size'
            or variable_name like 'key_buffer_size'
            or variable_name like 'max_allowed_packet'
            or variable_name like 'max_connections'
            or variable_name like 'max_heap_table_size'
            or variable_name like 'performance_schema'
            or variable_name like 'query_cache_size'
            or variable_name like 'read_buffer_size'
            or variable_name like 'read_rnd_buffer_size'
            or variable_name like 'sort_buffer_size'
            or variable_name like 'thread_stack'
            or variable_name like 'tmp_table_size'
            or variable_name like 'wsrep_on'
            or variable_name like 'wsrep_provider_options'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def get_status(conn):
    sql = """
        show global status
        where variable_name like 'Max_used_connections'
            ;
          """
    return lib.base.coe(lib.db_mysql.select(conn, sql))


def get_pf_memory(conn, myvar):
    """Return Performance Schema memory usage in bytes, or 0 when PFS is
    OFF / unavailable. PFS reports its own footprint as the `memory`
    row of `SHOW ENGINE PERFORMANCE_SCHEMA STATUS`. MySQL requires the
    `PROCESS` privilege for `SHOW ENGINE`; without it the query fails
    with a 1227 access-denied error. The monitoring user usually only
    has `SELECT`, so we treat that as a soft skip and report PFS
    memory as 0 rather than UNKNOWN-ing the whole plugin.
    """
    if myvar.get('performance_schema', 'OFF') != 'ON':
        return 0
    sql = 'show engine performance_schema status;'
    success, rows = lib.db_mysql.select(conn, sql)
    if not success:
        return 0
    for item in rows or []:
        if item['Type'] == 'performance_schema' and item['Name'].startswith('memory'):
            return int(item['Status'])
    return 0


def get_gcache_memory(myvar):
    """Return the Galera GCache size in bytes if this is a wsrep node with a
    parseable `gcache.size` in `wsrep_provider_options`, else 0.

    `wsrep_provider_options` looks like a long `;`-separated key=value blob:
        gmcast.listen_addr = tcp://0.0.0.0:4567; gcache.size = 128M; ...
    """
    if myvar.get('wsrep_on', 'OFF') != 'ON':
        return 0
    options = myvar.get('wsrep_provider_options') or ''
    match = re.search(r'gcache\.size\s*=\s*([^;\s]+)', options)
    if not match:
        return 0
    raw = match.group(1).strip()
    # gcache.size can be expressed with a SI/IEC suffix (e.g. "128M", "1G")
    # or as a raw byte count. lib.human.human2bytes handles both.
    try:
        return lib.human.human2bytes(raw)
    except (ValueError, TypeError):
        return 0


def get_other_process_memory():
    """Total RSS of processes that are *not* MySQL/MariaDB. Used to detect
    whether MySQL's theoretical peak plus the rest of the host's footprint
    would exceed physical RAM.
    """
    if not HAVE_PSUTIL:
        return 0
    db_names = {'mysqld', 'mariadbd'}
    total = 0
    try:
        for proc in psutil.process_iter(attrs=['name', 'memory_info']):
            name = proc.info.get('name') or ''
            if name in db_names:
                continue
            mem = proc.info.get('memory_info')
            if mem is None:
                continue
            total += mem.rss
    except psutil.NoSuchProcess:
        pass
    return total


def build_contributions(myvar, mycalc):
    """Return a list of (label, bytes, multiplied) tuples describing every
    knob's contribution to `max_peak_memory`, sorted by bytes descending.
    `multiplied` is True when the entry is a per-thread buffer multiplied by
    `max_connections` (so the label already encodes the math and the total
    bytes deserve an explicit `= total` on the right). Single-value entries
    have `multiplied=False` and the label already carries the value, so the
    renderer can omit the redundant `= value` tail.

    Used both for the top-N hint in the Recommendations block and the full
    `--lengthy` ranked breakdown.
    """
    bh = lib.human.bytes2human
    max_conn = int(myvar['max_connections'])

    def per_thread(varname):
        size = int(myvar[varname])
        return (
            f'`{varname}` ({bh(size)}) * `max_connections` ({max_conn})',
            size * max_conn,
            True,
        )

    def server_wide(label, value):
        if value == 0:
            return (f'{label}', 0, False)
        return (f'{label} ({bh(value)})', value, False)

    contributions = [
        server_wide(
            '`innodb_buffer_pool_size`',
            int(myvar.get('innodb_buffer_pool_size', 0)),
        ),
        per_thread('max_allowed_packet'),
        per_thread('sort_buffer_size'),
        per_thread('read_buffer_size'),
        per_thread('read_rnd_buffer_size'),
        per_thread('join_buffer_size'),
        per_thread('thread_stack'),
        server_wide('`key_buffer_size`', int(myvar['key_buffer_size'])),
        server_wide(
            '`aria_pagecache_buffer_size`',
            int(myvar.get('aria_pagecache_buffer_size', 0)),
        ),
        server_wide(
            '`innodb_log_buffer_size`',
            int(myvar.get('innodb_log_buffer_size', 0)),
        ),
        server_wide(
            '`innodb_additional_mem_pool_size`',
            int(myvar.get('innodb_additional_mem_pool_size', 0)),
        ),
        server_wide(
            '`query_cache_size`',
            int(myvar.get('query_cache_size', 0)),
        ),
        (
            f'`max_tmp_table_size` ({bh(mycalc["max_tmp_table_size"])},'
            f' min of `tmp_table_size` and `max_heap_table_size`)',
            mycalc['max_tmp_table_size'],
            False,
        ),
        server_wide('Performance Schema memory', mycalc['pf_memory']),
        server_wide('Galera GCache (`gcache.size`)', mycalc['gcache_memory']),
    ]
    contributions.sort(key=lambda item: -item[1])
    return contributions


def format_contribution(label, value, multiplied):
    """One bullet line for a contribution entry. Multiplied entries get an
    explicit `= total` because their label only shows the operands; single
    values have the bytes already inline in the label, so a trailing total
    would be redundant.
    """
    bh = lib.human.bytes2human
    if multiplied:
        return f'{label} = {bh(value)}'
    return label


def render_breakdown(myvar, mycalc):
    """Render the verbose --lengthy memory-breakdown block: every knob and its
    contribution to `max_peak_memory`, sorted by impact (largest first). The
    sort lets admins immediately see which knob to focus on instead of having
    to scan a categorical list.
    """
    bh = lib.human.bytes2human
    contributions = build_contributions(myvar, mycalc)

    lines = ['Memory breakdown (sorted by contribution to peak memory):', '']
    for label, value, multiplied in contributions:
        lines.append(f'  {format_contribution(label, value, multiplied)}')
    lines.append('  ' + '-' * 70)
    lines.append(f'  max_peak_memory = {bh(mycalc["max_peak_memory"])}')
    lines.append(
        f'  max_used_memory (currently reached) = {bh(mycalc["max_used_memory"])}'
    )
    lines.append(f'  Physical RAM (host) = {bh(mycalc["physical_memory"])}')
    lines.append(
        f'  Other process memory (host) = {bh(mycalc["other_process_memory"])}'
    )
    return '\n'.join(lines)


def main():
    """The main function. This is where the magic happens."""

    # logic taken from mysqltuner.pl:mysql_stats(), section "Memory usage",
    # verified in sync with MySQLTuner

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    mysql_connection = {
        'defaults_file': args.DEFAULTS_FILE,
        'defaults_group': args.DEFAULTS_GROUP,
        'timeout': args.TIMEOUT,
    }
    conn = lib.base.coe(lib.db_mysql.connect(mysql_connection))
    lib.base.coe(lib.db_mysql.check_privileges(conn))

    myvar = lib.db_mysql.lod2dict(get_vars(conn))
    mystat = lib.db_mysql.lod2dict(get_status(conn))

    # init some vars
    state = STATE_OK
    sections = []
    facts = []
    # All recommendations from all WARN/CRIT paths land here and render once
    # at the end as a `Recommendations:\n* ...` bulleted block, regardless of
    # which combinations of WARN paths fire.
    recommendations = []

    # analyze data
    mycalc = {}
    # The effective size of an internal in-memory temp table is the *minimum*
    # of these two settings (mysql docs:
    # https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_tmp_table_size).
    # The plugin used to take max() here, which over-estimated server_buffers.
    mycalc['max_tmp_table_size'] = min(
        int(myvar['tmp_table_size']),
        int(myvar['max_heap_table_size']),
    )
    mycalc['server_buffers'] = (
        int(myvar['key_buffer_size']) + mycalc['max_tmp_table_size']
    )
    mycalc['server_buffers'] += int(myvar.get('innodb_buffer_pool_size', 0))
    mycalc['server_buffers'] += int(myvar.get('innodb_additional_mem_pool_size', 0))
    mycalc['server_buffers'] += int(myvar.get('innodb_log_buffer_size', 0))
    mycalc['server_buffers'] += int(myvar.get('query_cache_size', 0))
    mycalc['server_buffers'] += int(myvar.get('aria_pagecache_buffer_size', 0))
    mycalc['per_thread_buffers'] = int(myvar['read_buffer_size'])
    mycalc['per_thread_buffers'] += int(myvar['read_rnd_buffer_size'])
    mycalc['per_thread_buffers'] += int(myvar['sort_buffer_size'])
    mycalc['per_thread_buffers'] += int(myvar['thread_stack'])
    mycalc['per_thread_buffers'] += int(myvar['max_allowed_packet'])
    mycalc['per_thread_buffers'] += int(myvar['join_buffer_size'])
    mycalc['max_total_per_thread_buffers'] = mycalc['per_thread_buffers'] * int(
        mystat['Max_used_connections']
    )
    mycalc['total_per_thread_buffers'] = mycalc['per_thread_buffers'] * int(
        myvar['max_connections']
    )

    mycalc['pf_memory'] = get_pf_memory(conn, myvar)
    mycalc['gcache_memory'] = get_gcache_memory(myvar)
    mycalc['max_used_memory'] = (
        mycalc['server_buffers']
        + mycalc['max_total_per_thread_buffers']
        + mycalc['pf_memory']
        + mycalc['gcache_memory']
    )
    mycalc['max_peak_memory'] = (
        mycalc['server_buffers']
        + mycalc['total_per_thread_buffers']
        + mycalc['pf_memory']
        + mycalc['gcache_memory']
    )
    mycalc['physical_memory'] = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')
    mycalc['pct_max_used_memory'] = round(
        mycalc['max_used_memory'] / mycalc['physical_memory'] * 100, 1
    )
    mycalc['pct_max_physical_memory'] = round(
        mycalc['max_peak_memory'] / mycalc['physical_memory'] * 100, 1
    )
    mycalc['other_process_memory'] = get_other_process_memory()

    lib.db_mysql.close(conn)

    # build the message
    bh = lib.human.bytes2human

    # used memory (currently reached)
    if not lib.base.IS_64BIT and mycalc['max_used_memory'] > ADDR_LIMIT_32BIT:
        used_state = STATE_WARN
        recommendations.append(
            f'Reduce MySQL buffer sizes: > {bh(ADDR_LIMIT_32BIT)} of allocations'
            f' on a 32-bit binary can cause instability'
        )
    else:
        used_state = lib.base.get_state(
            mycalc['pct_max_used_memory'],
            args.WARNING,
            args.CRITICAL,
            _operator='range',
        )
        if used_state != STATE_OK:
            recommendations.append(
                'Reduce your overall MySQL memory footprint for system stability'
            )
    state = lib.base.get_worst(state, used_state)
    facts.append(
        f'Max used memory: {mycalc["pct_max_used_memory"]}%'
        f' ({bh(mycalc["max_used_memory"])} of {bh(mycalc["physical_memory"])}'
        f' physical)'
        f'{lib.base.state2str(used_state, prefix=" ")}'
    )

    # peak memory (theoretical)
    peak_state = lib.base.get_state(
        mycalc['pct_max_physical_memory'],
        args.WARNING,
        args.CRITICAL,
        _operator='range',
    )
    if peak_state != STATE_OK:
        rec = 'Reduce your overall MySQL memory footprint for system stability'
        if rec not in recommendations:
            recommendations.append(rec)
    state = lib.base.get_worst(state, peak_state)
    facts.append(
        f'Peak possible: {mycalc["pct_max_physical_memory"]}%'
        f' ({bh(mycalc["max_peak_memory"])})'
        f'{lib.base.state2str(peak_state, prefix=" ")}'
    )

    # overcommit check (peak + other processes > physical)
    overcommit_state = STATE_OK
    overcommit_total = mycalc['max_peak_memory'] + mycalc['other_process_memory']
    if mycalc['physical_memory'] < overcommit_total:
        overcommit_state = STATE_WARN
        # Show the actual sums so the admin doesn't have to mentally compute
        # whether the numbers from the facts line above add up.
        recommendations.append(
            f'Dedicate this server to MySQL/MariaDB: peak MySQL memory'
            f' + other-process memory ({bh(mycalc["max_peak_memory"])}'
            f' + {bh(mycalc["other_process_memory"])}'
            f' = {bh(overcommit_total)}) exceeds physical RAM'
            f' ({bh(mycalc["physical_memory"])})'
        )
    state = lib.base.get_worst(state, overcommit_state)
    facts.append(
        f'Other process memory: {bh(mycalc["other_process_memory"])}'
        f'{lib.base.state2str(overcommit_state, prefix=" ")}'
    )

    # performance schema info note
    if myvar.get('performance_schema', 'OFF') != 'ON':
        facts.append('Performance Schema is OFF (pfs memory not counted)')

    sections.append('. '.join(facts) + '.')

    if args.LENGTHY:
        sections.append(render_breakdown(myvar, mycalc))

    # When any memory-related recommendation fired, surface the top
    # contributors to peak memory so admins immediately see which knobs to
    # focus on instead of having to guess or re-run with --lengthy.
    if recommendations:
        rec_lines = [f'* {r}' for r in recommendations]
        top = [c for c in build_contributions(myvar, mycalc) if c[1] > 0][:5]
        if top:
            rec_lines.append('* Top contributors to peak memory (largest first):')
            for label, value, multiplied in top:
                rec_lines.append(f'  - {format_contribution(label, value, multiplied)}')
        sections.append('Recommendations:\n' + '\n'.join(rec_lines))

    msg = '\n\n'.join(sections)

    # perfdata
    perfdata = ''
    perfdata += lib.base.get_perfdata(
        'mysql_aria_pagecache_buffer_size',
        myvar.get('aria_pagecache_buffer_size', 0),
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_gcache_memory',
        mycalc['gcache_memory'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_innodb_buffer_pool_size',
        myvar['innodb_buffer_pool_size'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_innodb_log_buffer_size',
        myvar['innodb_log_buffer_size'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_join_buffer_size',
        myvar['join_buffer_size'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_key_buffer_size',
        myvar['key_buffer_size'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_max_allowed_packet',
        myvar['max_allowed_packet'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_max_connections',
        myvar['max_connections'],
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_max_heap_table_size',
        myvar['max_heap_table_size'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_pf_memory',
        mycalc['pf_memory'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_query_cache_size',
        myvar.get('query_cache_size', 0),
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_read_buffer_size',
        myvar['read_buffer_size'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_read_rnd_buffer_size',
        myvar['read_rnd_buffer_size'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_sort_buffer_size',
        myvar['sort_buffer_size'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_thread_stack',
        myvar['thread_stack'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_tmp_table_size',
        myvar['tmp_table_size'],
        uom='B',
        _min=0,
    )

    perfdata += lib.base.get_perfdata(
        'mysql_max_used_connections',
        mystat['Max_used_connections'],
        _min=0,
    )

    perfdata += lib.base.get_perfdata(
        'mysql_physical_memory',
        mycalc['physical_memory'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_other_process_memory',
        mycalc['other_process_memory'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_max_peak_memory',
        mycalc['max_peak_memory'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_max_tmp_table_size',
        mycalc['max_tmp_table_size'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_max_total_per_thread_buffers',
        mycalc['max_total_per_thread_buffers'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_max_used_memory',
        mycalc['max_used_memory'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_pct_max_physical_memory',
        mycalc['pct_max_physical_memory'],
        uom='%',
        warn=args.WARNING,
        crit=args.CRITICAL,
        _min=0,
        _max=100,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_pct_max_used_memory',
        mycalc['pct_max_used_memory'],
        uom='%',
        warn=args.WARNING,
        crit=args.CRITICAL,
        _min=0,
        _max=100,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_per_thread_buffers',
        mycalc['per_thread_buffers'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_server_buffers',
        mycalc['server_buffers'],
        uom='B',
        _min=0,
    )
    perfdata += lib.base.get_perfdata(
        'mysql_total_per_thread_buffers',
        mycalc['total_per_thread_buffers'],
        uom='B',
        _min=0,
    )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
