我正在尝试设置水冷设备的检查。如果流量达到 0,我想设置警报。这是我的配置:
#/etc/nagios/custom/commands.cfg
## Chillwater
define command {
command_name chillwatersnmp1
command_line $USER1$/check_snmp -H $HOSTNAME$ -P 2c -C techlook -o 1.3.6.1.4.1.3815.1.2.2.1.1.2.1.1.2.266 -r "[1-9][0-9]*\.[0-9]{6}"
}
define command {
command_name chillwatersnmp2
command_line $USER1$/check_snmp -H $HOSTNAME$ -P 2c -C techlook -o 1.3.6.1.4.1.3815.1.2.2.1.1.2.1.1.2.279 -r "[1-9][0-9]*\.[0-9]{6}"
}
define command {
command_name chillwatersnmp3
command_line $USER1$/check_snmp -H $HOSTNAME$ -P 2c -C techlook -o 1.3.6.1.4.1.3815.1.2.2.1.1.2.1.1.2.279 -s 0 -c0:120
}
#/etc/nagios/custom/hosts/chillwater.cfg
define hostgroup {
hostgroup_name chillwater
alias chillwater
members cc-bb-mr.company.com,cc-bb-north.company.com
}
#/etc/nagios/custom/services/chillwater.cfg
define servicegroup {
servicegroup_name chillwater
alias chillwater
}
define service {
use basic-service
host_name cc-bb-mr.company.com
service_description Bridge-6-heat-flow
servicegroups chillwater
check_command chillwatersnmp1
contact_groups chillwatergroup
}
define service {
use basic-service
host_name cc-bb-mr.company.com
service_description Bridge-1-heat-flow
servicegroups chillwater
check_command chillwatersnmp2
contact_groups chillwatergroup
}
define service {
use basic-service
host_name cc-bb-north.company.com
service_description HPC-cool-heat-flow
servicegroups chillwater
check_command chillwatersnmp3
contact_groups chillwatergroup
}
#/etc/nagios/templates.cfg
define service{
name generic-service ; The 'name' of this service template
active_checks_enabled 1 ; Active service checks are enabled
passive_checks_enabled 1 ; Passive service checks are enabled/accepted
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
obsess_over_service 1 ; We should obsess over this service (if necessary)
check_freshness 0 ; Default is to NOT check service 'freshness'
notifications_enabled 1 ; Service notifications are enabled
event_handler_enabled 1 ; Service event handler is enabled
flap_detection_enabled 1 ; Flap detection is enabled
flap_detection_options o,c
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
is_volatile 0 ; The service is not volatile
check_period 24x7 ; The service can be checked at any time of the day
max_check_attempts 3 ; Re-check the service up to 3 times in order to determine its final (hard) state
check_interval 10 ; Check the service every 10 minutes under normal conditions
retry_interval 2 ; Re-check the service every two minutes until a hard state can be determined
contact_groups admins ; Notifications get sent out to everyone in the 'admins' group
notification_options c,r ; Send notifications about warning, unknown, critical, and recovery events
notification_interval 60 ; Re-notify about service problems every hour
notification_period 24x7 ; Notifications can be sent out at any time
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}
define service{
use generic-service
name basic-service
is_volatile 0
check_period 24x7
max_check_attempts 2
check_interval 10
retry_interval 3
notification_options c,r
notification_interval 10
notification_period 24x7
register 0
}
我在这里遗漏了什么?我等了十分钟(notification_interval)才收到警报,但 Web 控制台中什么也没有出现。
编辑:使用这些命令,我可以让 nagios 识别关键状态:
command_name z_chillwatersnmp1
command_line $USER1$/check_snmp -H $HOSTNAME$ -C techlook -o 1.3.6.1.4.1.3815.1.2.2.1.1.2.1.1.2.266 -s 0 --invert-search
}
define command {
command_name z_chillwatersnmp2
command_line $USER1$/check_snmp -H $HOSTNAME$ -C techlook -o 1.3.6.1.4.1.3815.1.2.2.1.1.2.1.1.2.279 -s 0 --invert-search
}
define command {
command_name z_chillwatersnmp3
command_line $USER1$/check_snmp -H $HOSTNAME$ -C techlook -o 1.3.6.1.4.1.3815.1.2.2.1.1.2.1.1.2.279 -s 0 --invert-search
}
问题已经解决了。现在的问题是,为什么当这些情况变得严重时,通知却没有发送?