summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix/vars
diff options
context:
space:
mode:
Diffstat (limited to 'roles/os_zabbix/vars')
-rw-r--r--roles/os_zabbix/vars/template_app_zabbix_agent.yml4
-rw-r--r--roles/os_zabbix/vars/template_app_zabbix_server.yml60
-rw-r--r--roles/os_zabbix/vars/template_docker.yml5
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml82
-rw-r--r--roles/os_zabbix/vars/template_os_linux.yml6
-rw-r--r--roles/os_zabbix/vars/template_performance_copilot.yml14
6 files changed, 136 insertions, 35 deletions
diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml
index 6349b6384..d636d4822 100644
--- a/roles/os_zabbix/vars/template_app_zabbix_agent.yml
+++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml
@@ -6,14 +6,14 @@ g_template_app_zabbix_agent:
applications:
- Zabbix agent
value_type: character
- zabbix_type: 0
+ zabbix_type: agent
- key: agent.ping
applications:
- Zabbix agent
description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check.
value_type: int
- zabbix_type: 0
+ zabbix_type: agent
ztriggers:
- name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes'
diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml
index aeec16254..43517113b 100644
--- a/roles/os_zabbix/vars/template_app_zabbix_server.yml
+++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml
@@ -8,7 +8,7 @@ g_template_app_zabbix_server:
description: A simple count of the number of partition creates output by the housekeeper script.
units: ''
value_type: int
- zabbix_type: 5
+ zabbix_type: internal
- key: housekeeper_drops
applications:
@@ -16,7 +16,7 @@ g_template_app_zabbix_server:
description: A simple count of the number of partition drops output by the housekeeper script.
units: ''
value_type: int
- zabbix_type: 5
+ zabbix_type: internal
- key: housekeeper_errors
applications:
@@ -24,7 +24,7 @@ g_template_app_zabbix_server:
description: A simple count of the number of errors output by the housekeeper script.
units: ''
value_type: int
- zabbix_type: 5
+ zabbix_type: internal
- key: housekeeper_total
applications:
@@ -33,7 +33,7 @@ g_template_app_zabbix_server:
script.
units: ''
value_type: int
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,alerter,avg,busy]
applications:
@@ -41,7 +41,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,configuration syncer,avg,busy]
applications:
@@ -49,7 +49,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,db watchdog,avg,busy]
applications:
@@ -57,7 +57,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,discoverer,avg,busy]
applications:
@@ -65,7 +65,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,escalator,avg,busy]
applications:
@@ -73,7 +73,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,history syncer,avg,busy]
applications:
@@ -81,7 +81,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,housekeeper,avg,busy]
applications:
@@ -89,7 +89,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,http poller,avg,busy]
applications:
@@ -97,7 +97,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,icmp pinger,avg,busy]
applications:
@@ -105,7 +105,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,ipmi poller,avg,busy]
applications:
@@ -113,7 +113,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,java poller,avg,busy]
applications:
@@ -121,7 +121,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,node watcher,avg,busy]
applications:
@@ -129,7 +129,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,poller,avg,busy]
applications:
@@ -137,7 +137,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,proxy poller,avg,busy]
applications:
@@ -145,7 +145,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,self-monitoring,avg,busy]
applications:
@@ -153,7 +153,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,snmp trapper,avg,busy]
applications:
@@ -161,7 +161,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,timer,avg,busy]
applications:
@@ -169,7 +169,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,trapper,avg,busy]
applications:
@@ -177,7 +177,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[process,unreachable poller,avg,busy]
applications:
@@ -185,7 +185,7 @@ g_template_app_zabbix_server:
description: ''
units: '%'
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[queue,10m]
applications:
@@ -193,7 +193,7 @@ g_template_app_zabbix_server:
description: ''
units: ''
value_type: int
- zabbix_type: 5
+ zabbix_type: internal
interval: 600
- key: zabbix[queue]
@@ -202,7 +202,7 @@ g_template_app_zabbix_server:
description: ''
units: ''
value_type: int
- zabbix_type: 5
+ zabbix_type: internal
interval: 600
- key: zabbix[rcache,buffer,pfree]
@@ -211,7 +211,7 @@ g_template_app_zabbix_server:
description: ''
units: ''
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[wcache,history,pfree]
applications:
@@ -219,7 +219,7 @@ g_template_app_zabbix_server:
description: ''
units: ''
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[wcache,text,pfree]
applications:
@@ -227,7 +227,7 @@ g_template_app_zabbix_server:
description: ''
units: ''
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[wcache,trend,pfree]
applications:
@@ -235,7 +235,7 @@ g_template_app_zabbix_server:
description: ''
units: ''
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
- key: zabbix[wcache,values]
applications:
@@ -243,7 +243,7 @@ g_template_app_zabbix_server:
description: ''
units: ''
value_type: float
- zabbix_type: 5
+ zabbix_type: internal
delta: 1 # speed per second
ztriggers:
diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml
index 395e054de..bfabf50c5 100644
--- a/roles/os_zabbix/vars/template_docker.yml
+++ b/roles/os_zabbix/vars/template_docker.yml
@@ -7,6 +7,11 @@ g_template_docker:
- Docker Daemon
value_type: int
+ - key: docker.info_elapsed_ms
+ applications:
+ - Docker Daemon
+ value_type: int
+
- key: docker.storage.is_loopback
applications:
- Docker Storage
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 1de4fefbb..6defc4989 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -31,6 +31,78 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.etcd.create.success
+ description: Show number of successful create actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.create.fail
+ description: Show number of failed create actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.delete.success
+ description: Show number of successful delete actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.delete.fail
+ description: Show number of failed delete actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.get.success
+ description: Show number of successful get actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.get.fail
+ description: Show number of failed get actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.set.success
+ description: Show number of successful set actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.set.fail
+ description: Show number of failed set actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.update.success
+ description: Show number of successful update actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.update.fail
+ description: Show number of failed update actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.watchers
+ description: Show number of etcd watchers
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.ping
+ description: etcd ping
+ type: int
+ applications:
+ - Openshift Etcd
+
ztriggers:
- name: 'Application creation has failed on {HOST.NAME}'
expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
@@ -56,3 +128,13 @@ g_template_openshift_master:
expression: '{Template Openshift Master:openshift.project.counter.last()}=0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
priority: info
+
+ - name: 'Low number of etcd watchers on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ priority: avg
+
+ - name: 'Etcd ping failed on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ priority: high
diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml
index 3ae1500bc..aeeec4b8d 100644
--- a/roles/os_zabbix/vars/template_os_linux.yml
+++ b/roles/os_zabbix/vars/template_os_linux.yml
@@ -246,15 +246,15 @@ g_template_os_linux:
# CPU Utilization #
- name: 'CPU idle less than 5% on {HOST.NAME}'
- expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<5 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<5'
+ expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<5'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
priority: average
description: 'CPU is less than 5% idle'
- name: 'CPU idle less than 10% on {HOST.NAME}'
- expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<10 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<10'
+ expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<10'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
- priority: warn
+ priority: average
description: 'CPU is less than 10% idle'
dependencies:
- 'CPU idle less than 5% on {HOST.NAME}'
diff --git a/roles/os_zabbix/vars/template_performance_copilot.yml b/roles/os_zabbix/vars/template_performance_copilot.yml
new file mode 100644
index 000000000..b62fa0228
--- /dev/null
+++ b/roles/os_zabbix/vars/template_performance_copilot.yml
@@ -0,0 +1,14 @@
+---
+g_template_performance_copilot:
+ name: Template Performance Copilot
+ zitems:
+ - key: pcp.ping
+ applications:
+ - Performance Copilot
+ value_type: int
+
+ ztriggers:
+ - name: 'pcp.ping failed on {HOST.NAME}'
+ expression: '{Template Performance Copilot:pcp.ping.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_pcp_ping.asciidoc'
+ priority: average