In this post, I will give you a step by step guide on how to monitor a
remote Linux host and the various services running on the remote host.
How Nagios Works?
Following steps will take place when Nagios (installed on Nagios server) monitors a service (eg: server load) on the remote host
1) Nagios will execute check_nrpe command on nagios-server and will request it to monitor disk usage on remote host using check_load command.
2) The check_nrpe on the nagios-server will contact the NRPE daemon on remote host and will request it to execute the check_load on remote host.
3) The results of the check_load command will be returned back by NRPE daemon to the check_nrpe on nagios-server.
1) Nagios will execute check_nrpe command on nagios-server and will request it to monitor disk usage on remote host using check_load command.
2) The check_nrpe on the nagios-server will contact the NRPE daemon on remote host and will request it to execute the check_load on remote host.
3) The results of the check_load command will be returned back by NRPE daemon to the check_nrpe on nagios-server.
Nagios Server(check_nrpe)~>Remote host(NRPE deamon)~>check_load
Nagios Server(check_nrpe)<~Remote host(NRPE deamon)<~check_load
Nagios Server(check_nrpe)<~Remote host(NRPE deamon)<~check_load
Before we begin, we need to install some packages on the remote host for Nagios to work fine.
# yum install elinks gcc make gcc-c++
# yum -y install openssl-devel
# useradd nagios
# passwd nagios
# cd /usr/src/
# yum -y install openssl-devel
# useradd nagios
# passwd nagios
# cd /usr/src/
Then install Nagios plugin and NRPE on remote host.
Install Plug-in
# wget http://sourceforge.net/projects/nagiosplug/files/nagiosplug/1.4.15/nagios-plugins-1.4.15.tar.gz/download
# tar -zxvf nagios-plugins-1.4.15.tar.gz && cd nagios-plugins-1.4.15
# ./configure --with-nagios-user=nagios --with-nagios-group=nagios
# make
# make install
# chown nagios:nagios /usr/local/nagios
# chown -R nagios:nagios /usr/local/nagios/libexec
# cd ..
# tar -zxvf nagios-plugins-1.4.15.tar.gz && cd nagios-plugins-1.4.15
# ./configure --with-nagios-user=nagios --with-nagios-group=nagios
# make
# make install
# chown nagios:nagios /usr/local/nagios
# chown -R nagios:nagios /usr/local/nagios/libexec
# cd ..
Install NRPE
#wget http://sourceforge.net/projects/nagios/files/nrpe-2.x/nrpe-2.12/nrpe-2.12.tar.gz/download
# tar -zxvf nrpe-2.12.tar.gz && cd nrpe-2.12
# ./configure
# make all
# make install-plugin
# make install-daemon
# make install-daemon-config
# make install-xinetd
# tar -zxvf nrpe-2.12.tar.gz && cd nrpe-2.12
# ./configure
# make all
# make install-plugin
# make install-daemon
# make install-daemon-config
# make install-xinetd
Edit Xinetd NRPE entry
# vi /etc/xinetd.d/nrpe
only_from = 127.0.0.1 204.93.156.227
(nagios monitoring server ip-address )
:wq (save and exit)
Edit services file entry
# vi /etc/services
nrpe 5666/tcp # Entry for NRPE daemon
:wq (save and exit)
Restart xinetd
# service xinetd restart
Verify whether NRPE is listening
# netstat -at |grep nrpe # output -: tcp 0 0 *:nrpe *.* LISTEN
Verify to make sure the NRPE is functioning properly
# /usr/local/nagios/libexec/check_nrpe -H localhost
NRPE v2.12
Configuring Nagios monitoring server to monitor the remote host
# cd /usr/src
# wget http://sourceforge.net/projects/nagios/files/nrpe-2.x/nrpe-2.12/nrpe-2.12.tar.gz/download
# tar -zxvf nrpe-2.12.tar.gz && cd nrpe-2.12
# yum -y install openssl-devel
# yum install perl
# ./configure
# make all
# make install-plugin
# wget http://sourceforge.net/projects/nagios/files/nrpe-2.x/nrpe-2.12/nrpe-2.12.tar.gz/download
# tar -zxvf nrpe-2.12.tar.gz && cd nrpe-2.12
# yum -y install openssl-devel
# yum install perl
# ./configure
# make all
# make install-plugin
Create a command definition
# vi /home/nagios/public_html/etc/objects/commands.cfg
Add the following:
# NRPE CHECK COMMAND
# Command to use NRPE to check remote host systems
# Command to use NRPE to check remote host systems
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
Create configuration file for remote host
# cp –prf /home/nagios/www/etc/objects/ localhost.cfg /home/nagios/www/etc/objects/remotehost.cfg
# vi /home/nagios/www/etc/objects/remotehost.cfg
Replace the values “host_name” “alias” “address” with the values that match your setup:
# vi /home/nagios/www/etc/objects/remotehost.cfg
Replace the values “host_name” “alias” “address” with the values that match your setup:
** The “host_name” you set for the “define_host” section must match the “host_name” in the “define_service” section **
# Define a host for the remote machine
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name alpha235
alias alpha235
address 62.75.215.12
}
# SERVICE DEFINITIONS
# Define a service to "ping" the local machine
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
# Define a service to check the disk space of the root partition.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description Root Partition
check_command check_nrpe!check_disk
}
# Define a service to check the number of currently logged in users on the remotehost.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description Current Users
check_command check_nrpe!check_users
}
# Define a service to check the number of currently running processes on the remote host.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description Total Processes
check_command check_nrpe!check_total_procs
}
# Define a service to check the load on the remote host.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description Current Load
check_command check_nrpe!check_load
}
# Define a service to check SSH on the remote host.
# Disable notifications for this service by default, as not all users may have SSH enabled.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description SSH
check_command check_nrpe!check_ssh
notifications_enabled 0
}
# Define a service to check HTTP on the remote host.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description HTTP
check_command check_nrpe!check_http
notifications_enabled 0
}
# Define a host for the remote machine
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name alpha235
alias alpha235
address 62.75.215.12
}
# SERVICE DEFINITIONS
# Define a service to "ping" the local machine
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
# Define a service to check the disk space of the root partition.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description Root Partition
check_command check_nrpe!check_disk
}
# Define a service to check the number of currently logged in users on the remotehost.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description Current Users
check_command check_nrpe!check_users
}
# Define a service to check the number of currently running processes on the remote host.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description Total Processes
check_command check_nrpe!check_total_procs
}
# Define a service to check the load on the remote host.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description Current Load
check_command check_nrpe!check_load
}
# Define a service to check SSH on the remote host.
# Disable notifications for this service by default, as not all users may have SSH enabled.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description SSH
check_command check_nrpe!check_ssh
notifications_enabled 0
}
# Define a service to check HTTP on the remote host.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
use generic-service ; Name of service template to use
host_name alpha235
service_description HTTP
check_command check_nrpe!check_http
notifications_enabled 0
}
Activate the remotehost.cfg template
# vi /usr/local/nagios/etc/nagios.cfg
Definitions for monitoring remote Linux machine
Definitions for monitoring remote Linux machine
cfg_file=/home/nagios/www/etc/objects/remotehost.cfg
Verify Nagios Configuration Files
# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg Output : ...
...
Things look okay - No serious problems were detected during the pre-flight check
Verify whether nagios monitoring server can talk to the remote host
# /usr/local/nagios/libexec/check_nrpe -H 62.75.215.12
NRPE v2.12
Start nagios
# /home/nagios/www/bin/nagios -d /home/nagios/www/etc/nagios.cfg
Restart nagios (if already running)
# kill -HUP <nagios_pid>
# /usr/local/nagios/libexec/check_nrpe -H 62.75.215.12 -c check_load (To check the load of the remote server)
# /usr/local/nagios/libexec/check_nrpe -H 62.75.215.12 -c check_load (To check the load of the remote server)
OK - load average: 0.35, 0.30, 0.23|load1=0.350;15.000;30.000;0; load5=0.300;10.000;25.000;0; load15=0.230;5.000;20.000;0;
(Usage:
check_nrpe -H <host> [-n] [-u] [-p <port>] [-t
<timeout>] [-c <command>] [-a <arglist...>])
Welcomes comments and suggestions..
Sarath admin ROCKS .....!!!!!!
ReplyDeleteSarath admin ROCKS.....!!!!
ReplyDeleteNice Tutorial.......
ReplyDeleteI configure the Nagios Server... how can configure multiple switches and Routers in Single file (Swicth.cfg and Router.cfg)...