Differences
This shows you the differences between two versions of the page.
linux_wiki:xymon_cpu_load_threshold_calc [2016/11/27 00:08] billdozor |
linux_wiki:xymon_cpu_load_threshold_calc [2019/05/25 23:50] |
||
---|---|---|---|
Line 1: | Line 1: | ||
- | ====== Xymon Cpu Load Threshold Calc ====== | ||
- | |||
- | **General Information** | ||
- | |||
- | Posted to the Xymon Community at: https:// | ||
- | |||
- | Calculate (and set) a Xymon client' | ||
- | |||
- | This is mean to run on the Xymon Server periodically with cron. | ||
- | |||
- | It allows for far more useful load monitoring than arbitrarily setting a generic load or spending a lot of time editing config files for each system. | ||
- | |||
- | **Checklist** | ||
- | * Xymon server installed/ | ||
- | * Xymon clients checking in | ||
- | |||
- | ---- | ||
- | |||
- | ====== Installation ====== | ||
- | |||
- | Installation instructions. | ||
- | |||
- | ===== Client side ===== | ||
- | |||
- | No client modifications required. | ||
- | |||
- | ===== Server side ===== | ||
- | |||
- | - Install the ' | ||
- | - Enterprise Linux 6/7<code bash>yum install bc</ | ||
- | - Create the cpu-load-calc.sh script somewhere such as: / | ||
- | - See source below for contents | ||
- | - Edit the " | ||
- | - Edit multipliers if desired(number of procs * number for warning and critical CPU load thresholds) | ||
- | - Create the auto load directory on the Xymon server | ||
- | - Default: / | ||
- | - Add the auto load directory to the Xymon main analysis.cfg file so it is included | ||
- | - Default: " | ||
- | - Set the default load in / | ||
- | - That way, a hostdata file is generated for a system that has not had its load auto calculated. (and you don't get alert emails/ | ||
- | - Setup the script to auto run via cron (-v is verbose output) | ||
- | - Example< | ||
- | #!/bin/bash | ||
- | # Description: | ||
- | |||
- | / | ||
- | |||
- | ---- | ||
- | |||
- | ====== The Script ====== | ||
- | |||
- | <hidden onHidden=" | ||
- | <code bash cpu-load-calc.sh> | ||
- | #!/bin/bash | ||
- | # Title: cpu-load-calc.sh | ||
- | # Description: | ||
- | # Dependency: Requires ' | ||
- | |||
- | # | ||
- | # Customize Here | ||
- | # | ||
- | |||
- | # Warning and Critical Load Multipliers (num of procs * multiplier) | ||
- | load_warn_multiplier=1.0 | ||
- | load_crit_multiplier=1.5 | ||
- | |||
- | # Directory to save auto load thresholds | ||
- | auto_load_dir="/ | ||
- | |||
- | # Xymon server' | ||
- | xymon_hostdata_dir="/ | ||
- | |||
- | # Xymon server' | ||
- | xymon_analysis_cfg="/ | ||
- | # | ||
- | # End of Customize | ||
- | # | ||
- | |||
- | # | ||
- | # Pre-Run Error Checking | ||
- | # | ||
- | ## Dependency Check ## | ||
- | which bc &> /dev/null | ||
- | if [[ $? -eq 1 ]]; then | ||
- | echo ">> | ||
- | exit 1 | ||
- | fi | ||
- | |||
- | ## Does the Auto Load Directory exist? | ||
- | if [[ ! -d ${auto_load_dir} ]]; then | ||
- | echo ">> | ||
- | exit | ||
- | fi | ||
- | |||
- | ## Write Access Check | ||
- | touch ${auto_load_dir}/ | ||
- | if [[ $? -eq 1 ]]; then | ||
- | echo ">> | ||
- | exit 1 | ||
- | else | ||
- | rm -f ${auto_load_dir}/ | ||
- | fi | ||
- | |||
- | ## Check if the auto_load_dir is included in main analysis config file | ||
- | grep " | ||
- | if [[ $? -eq 1 ]]; then | ||
- | echo -e ">> | ||
- | fi | ||
- | # | ||
- | # End of Pre-Run Error Checking | ||
- | # | ||
- | |||
- | # | ||
- | # Functions; Main starts after | ||
- | # | ||
- | |||
- | function show_usage | ||
- | { | ||
- | echo -e " | ||
- | echo -e " | ||
- | echo -e " | ||
- | echo -e " | ||
- | echo -e "$0 -v => Verbose output." | ||
- | echo -e "$0 -r => Refresh CPU load data (force hostdata update)." | ||
- | echo -e "$0 -h => Display usage." | ||
- | } | ||
- | |||
- | # Force snapshots of hostdata | ||
- | function force_hostdata | ||
- | { | ||
- | # Use node name passed as argument | ||
- | node_name=${1} | ||
- | |||
- | # Lie to Xymon that the node's cpu is yellow, forcing a hostdata snapshot | ||
- | xymon 127.0.0.1 " | ||
- | } | ||
- | |||
- | # | ||
- | # Get Script Arguments | ||
- | # | ||
- | # Reset POSIX variable in case it has been used previously in this shell | ||
- | OPTIND=1 | ||
- | |||
- | # By default, no verbose output | ||
- | verbose_output=" | ||
- | refresh_cpus=" | ||
- | |||
- | while getopts " | ||
- | case " | ||
- | h) # -h (help) argument | ||
- | show_usage | ||
- | exit 0 | ||
- | ;; | ||
- | r) # -r (refersh cpus) argument | ||
- | refresh_cpus=" | ||
- | ;; | ||
- | v) # -v (verbose) argument | ||
- | verbose_output=" | ||
- | ;; | ||
- | *) # invalid argument | ||
- | show_usage | ||
- | exit 0 | ||
- | ;; | ||
- | esac | ||
- | done | ||
- | |||
- | # | ||
- | # Main Program | ||
- | # | ||
- | echo -e "== Xymon Client Auto Load Thresholds ==" | ||
- | echo -e "Load Warning Multiplier: ${load_warn_multiplier}" | ||
- | echo -e "Load Critical Multiplier: ${load_crit_multiplier}" | ||
- | echo -e " | ||
- | |||
- | # For each node reporting host data | ||
- | for node in $(ls ${xymon_hostdata_dir}); | ||
- | |||
- | if [[ ${verbose_output} == " | ||
- | echo -e " | ||
- | fi | ||
- | |||
- | if [[ ${refresh_cpus} == " | ||
- | if [[ ${verbose_output} == " | ||
- | echo -e " | ||
- | fi | ||
- | # Force an update of hostdata | ||
- | force_hostdata ${node} | ||
- | fi | ||
- | |||
- | # Get the number of procs reported from node's most recent host data file | ||
- | node_num_procs=" | ||
- | | ||
- | # If node_num_procs is empty or not a number, move to the next node | ||
- | if [[ -z ${node_num_procs} || ! ${node_num_procs} =~ [0-9][0-9]* ]]; then | ||
- | # Did not find ' | ||
- | |||
- | if [[ ${verbose_output} == " | ||
- | echo "-> Warning! Could not find ' | ||
- | fi | ||
- | |||
- | continue | ||
- | fi | ||
- | |||
- | # Calculate the warning and critical load thresholds (normalize as a floating point with bc) | ||
- | load_warning=$(echo " | ||
- | load_critical=$(echo " | ||
- | |||
- | if [[ ${verbose_output} == " | ||
- | echo -e "-> Number of Procs: ${node_num_procs}" | ||
- | echo -e "-> Warning at: ${load_warning}" | ||
- | echo -e "-> Critical at: ${load_critical}" | ||
- | echo -e "-> Creating node analysis drop in file..." | ||
- | fi | ||
- | |||
- | # Create analysis drop in file | ||
- | echo "# ${node}' | ||
- | echo " | ||
- | echo " | ||
- | done | ||
- | |||
- | echo -e "\n== Auto Load Thresholds Complete ==" | ||
- | |||
- | exit 0 | ||
- | </ | ||
- | </ | ||
- | |||
- | ---- | ||