#!/bin/bash # Title: cpu-load-calc.sh # Description: Calculate a xymon client's cpu load (Run on Xymon Server periodically with cron) # Dependency: Requires 'bc' package # Last Change: 2018-05-22 # Recent Changes:-Updated awk search to look for [nproc] at the beginning of the line #======================= # Customize Here #======================= # Warning and Critical Load Multipliers (num of procs * multiplier) load_warn_multiplier=1.0 load_crit_multiplier=1.5 # Directory to save auto load thresholds auto_load_dir="/etc/xymon/analysis.d/auto-cpuload.d" # Xymon server's hostdata directory xymon_hostdata_dir="/var/lib/xymon/hostdata" # Xymon server's main analysis config file xymon_analysis_cfg="/etc/xymon/analysis.cfg" #======================= # End of Customize #======================= #======================= # Pre-Run Error Checking #======================= ## Dependency Check ## which bc &> /dev/null if [[ $? -eq 1 ]]; then echo ">> Error! Dependent package 'bc' (byte code) not detected. Exiting..." exit 1 fi ## Does the Auto Load Directory exist? if [[ ! -d ${auto_load_dir} ]]; then echo ">> Error! The directory (${auto_load_dir}) does not exist or is not a directory. Exiting..." exit fi ## Write Access Check touch ${auto_load_dir}/testfile &> /dev/null if [[ $? -eq 1 ]]; then echo ">> Error! User '$(whoami)' does not have write access to ${auto_load_dir}! Exiting..." exit 1 else rm -f ${auto_load_dir}/testfile &> /dev/null fi ## Check if the auto_load_dir is included in main analysis config file grep "^directory ${auto_load_dir}" ${xymon_analysis_cfg} &> /dev/null if [[ $? -eq 1 ]]; then echo -e ">> Warning! Auto load directory (${auto_load_dir}) is not included in ${xymon_analysis_cfg}. Continuing, but auto CPU load settings will not take affect until 'directory ${auto_load_dir}' is added to ${xymon_analysis_cfg}.\n" fi #======================= # End of Pre-Run Error Checking #======================= #=============================== # Functions; Main starts after #=============================== function show_usage { echo -e "\n####==== Xymon Client Auto Load Thresholds ====####" echo -e "\nDescripton: Calculate a xymon client's cpu load." echo -e "\n--Usage" echo -e "$0 => No arguments, configure with no verbosity." echo -e "$0 -v => Verbose output." echo -e "$0 -r => Refresh CPU load data (force hostdata update)." echo -e "$0 -h => Display usage." } # Force snapshots of hostdata function force_hostdata { # Use node name passed as argument node_name=${1} # Lie to Xymon that the node's cpu is green, then yellow, forcing a hostdata snapshot xymon 127.0.0.1 "status ${node_name}.cpu green $(date)" xymon 127.0.0.1 "status ${node_name}.cpu yellow $(date)" } #======================= # Get Script Arguments #======================= # Reset POSIX variable in case it has been used previously in this shell OPTIND=1 # By default, no verbose output verbose_output="no" refresh_cpus="no" while getopts "hrv" opt; do case "${opt}" in h) # -h (help) argument show_usage exit 0 ;; r) # -r (refersh cpus) argument refresh_cpus="yes" ;; v) # -v (verbose) argument verbose_output="yes" ;; *) # invalid argument show_usage exit 0 ;; esac done #======================= # Main Program #======================= echo -e "== Xymon Client Auto Load Thresholds ==" echo -e "Load Warning Multiplier: ${load_warn_multiplier}" echo -e "Load Critical Multiplier: ${load_crit_multiplier}" echo -e "Saving configs to: ${auto_load_dir}" # For each node reporting host data for node in $(ls ${xymon_hostdata_dir}); do if [[ ${verbose_output} == "yes" ]]; then echo -e "\n>> Working on node: ${node}" fi if [[ ${refresh_cpus} == "yes" ]]; then if [[ ${verbose_output} == "yes" ]]; then echo -e "\n-> Refreshing hostdata..." fi # Force an update of hostdata force_hostdata ${node} fi # Get the number of procs reported from node's most recent host data file node_num_procs="$(cat ${xymon_hostdata_dir}/${node}/$(ls -tr ${xymon_hostdata_dir}/${node}/ | tail -1) | awk '/^\[nproc]/ { getline; print }')" # If node_num_procs is empty or not a number, move to the next node if [[ -z ${node_num_procs} || ! ${node_num_procs} =~ [0-9][0-9]* ]]; then # Did not find 'nproc' in the host data file or no number from nproc returned if [[ ${verbose_output} == "yes" ]]; then echo "-> Warning! Could not find 'nproc' in ${node}'s host data file or no number returned. Skipping..." fi continue fi # Calculate the warning and critical load thresholds (normalize as a floating point with bc) load_warning=$(echo "${node_num_procs} * ${load_warn_multiplier}" | bc) load_critical=$(echo "${node_num_procs} * ${load_crit_multiplier}" | bc) if [[ ${verbose_output} == "yes" ]]; then echo -e "-> Number of Procs: ${node_num_procs}" echo -e "-> Warning at: ${load_warning}" echo -e "-> Critical at: ${load_critical}" echo -e "-> Creating node analysis drop in file..." fi # Create analysis drop in file echo "# ${node}'s CPU Load Thresholds (Warning Critical)" > ${auto_load_dir}/${node}.cfg echo "HOST=${node}" >> ${auto_load_dir}/${node}.cfg echo " LOAD ${load_warning} ${load_critical}" >> ${auto_load_dir}/${node}.cfg done echo -e "\n== Auto Load Thresholds Complete ==" exit 0