From 693be4802c2b3886b82681c5c1666b9f13d9ca36 Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Fri, 21 Aug 2015 17:44:30 -0400 Subject: Updates for zbx ans module --- roles/os_zabbix/vars/main.yml | 1 + roles/os_zabbix/vars/template_heartbeat.yml | 12 ++ roles/os_zabbix/vars/template_host.yml | 27 +++++ roles/os_zabbix/vars/template_master.yml | 27 +++++ roles/os_zabbix/vars/template_node.yml | 27 +++++ roles/os_zabbix/vars/template_os_linux.yml | 173 ++++++++++++++++++++++++++++ roles/os_zabbix/vars/template_router.yml | 27 +++++ 7 files changed, 294 insertions(+) create mode 100644 roles/os_zabbix/vars/main.yml create mode 100644 roles/os_zabbix/vars/template_heartbeat.yml create mode 100644 roles/os_zabbix/vars/template_host.yml create mode 100644 roles/os_zabbix/vars/template_master.yml create mode 100644 roles/os_zabbix/vars/template_node.yml create mode 100644 roles/os_zabbix/vars/template_os_linux.yml create mode 100644 roles/os_zabbix/vars/template_router.yml (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/main.yml b/roles/os_zabbix/vars/main.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/roles/os_zabbix/vars/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/os_zabbix/vars/template_heartbeat.yml b/roles/os_zabbix/vars/template_heartbeat.yml new file mode 100644 index 000000000..8389485d3 --- /dev/null +++ b/roles/os_zabbix/vars/template_heartbeat.yml @@ -0,0 +1,12 @@ +--- +g_template_heartbeat: + name: Template Heartbeat + zitems: + - name: Heartbeat Ping + applications: + - Heartbeat + key: heartbeat.ping + ztriggers: + - description: 'Heartbeat.ping has failed on {HOST.NAME}' + expression: '{Template Heartbeat:heartbeat.ping.last()}<>0' + priority: avg diff --git a/roles/os_zabbix/vars/template_host.yml b/roles/os_zabbix/vars/template_host.yml new file mode 100644 index 000000000..e7cc667cb --- /dev/null +++ b/roles/os_zabbix/vars/template_host.yml @@ -0,0 +1,27 @@ +--- +g_template_host: + params: + name: Template Host + host: Template Host + groups: + - groupid: 1 # FIXME (not real) + output: extend + search: + name: Template Host + zitems: + - name: Host Ping + hostid: + key_: host.ping + type: 2 + value_type: 0 + output: extend + search: + key_: host.ping + ztriggers: + - description: 'Host ping has failed on {HOST.NAME}' + expression: '{Template Host:host.ping.last()}<>0' + priority: 3 + searchWildcardsEnabled: True + search: + description: 'Host ping has failed on*' + expandExpression: True diff --git a/roles/os_zabbix/vars/template_master.yml b/roles/os_zabbix/vars/template_master.yml new file mode 100644 index 000000000..5f9b41a4f --- /dev/null +++ b/roles/os_zabbix/vars/template_master.yml @@ -0,0 +1,27 @@ +--- +g_template_master: + params: + name: Template Master + host: Template Master + groups: + - groupid: 1 # FIXME (not real) + output: extend + search: + name: Template Master + zitems: + - name: Master Etcd Ping + hostid: + key_: master.etcd.ping + type: 2 + value_type: 0 + output: extend + search: + key_: master.etcd.ping + ztriggers: + - description: 'Master Etcd ping has failed on {HOST.NAME}' + expression: '{Template Master:master.etcd.ping.last()}<>0' + priority: 3 + searchWildcardsEnabled: True + search: + description: 'Master Etcd ping has failed on*' + expandExpression: True diff --git a/roles/os_zabbix/vars/template_node.yml b/roles/os_zabbix/vars/template_node.yml new file mode 100644 index 000000000..98c343a24 --- /dev/null +++ b/roles/os_zabbix/vars/template_node.yml @@ -0,0 +1,27 @@ +--- +g_template_node: + params: + name: Template Node + host: Template Node + groups: + - groupid: 1 # FIXME (not real) + output: extend + search: + name: Template Node + zitems: + - name: Kubelet Ping + hostid: + key_: kubelet.ping + type: 2 + value_type: 0 + output: extend + search: + key_: kubelet.ping + ztriggers: + - description: 'Kubelet ping has failed on {HOST.NAME}' + expression: '{Template Node:kubelet.ping.last()}<>0' + priority: 3 + searchWildcardsEnabled: True + search: + description: 'Kubelet ping has failed on*' + expandExpression: True diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml new file mode 100644 index 000000000..1c9d10bb0 --- /dev/null +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -0,0 +1,173 @@ +--- +g_template_os_linux: + name: Template OS Linux + zitems: + - key: kernel.uname.sysname + applications: + - Kernel + value_type: string + + - key: kernel.all.cpu.wait.total + applications: + - Kernel + value_type: int + + - key: kernel.all.cpu.irq.hard + applications: + - Kernel + value_type: int + + - key: kernel.all.cpu.idle + applications: + - Kernel + value_type: int + + - key: kernel.uname.distro + applications: + - Kernel + value_type: string + + - key: kernel.uname.nodename + applications: + - Kernel + value_type: string + + - key: kernel.all.cpu.irq.soft + applications: + - Kernel + value_type: int + + - key: kernel.all.load.15_minute + applications: + - Kernel + value_type: float + + - key: kernel.all.cpu.sys + applications: + - Kernel + value_type: int + + - key: kernel.all.load.5_minute + applications: + - Kernel + value_type: float + + - key: mem.freemem + applications: + - Memory + value_type: int + + - key: kernel.all.cpu.nice + applications: + - Kernel + value_type: int + + - key: mem.util.bufmem + applications: + - Memory + value_type: int + + - key: swap.used + applications: + - Memory + value_type: int + + - key: kernel.all.load.1_minute + applications: + - Kernel + value_type: float + + - key: kernel.uname.version + applications: + - Kernel + value_type: string + + - key: swap.length + applications: + - Memory + value_type: int + + - key: mem.physmem + applications: + - Memory + value_type: int + + - key: kernel.all.uptime + applications: + - Kernel + value_type: int + + - key: swap.free + applications: + - Memory + value_type: int + + - key: mem.util.used + applications: + - Memory + value_type: int + + - key: kernel.all.cpu.user + applications: + - Kernel + value_type: int + + - key: kernel.uname.machine + applications: + - Kernel + value_type: string + + - key: hinv.ncpu + applications: + - Kernel + value_type: int + + - key: mem.util.cached + applications: + - Memory + value_type: int + + - key: kernel.all.cpu.steal + applications: + - Kernel + value_type: int + + - key: kernel.all.pswitch + applications: + - Kernel + value_type: int + + - key: kernel.uname.release + applications: + - Kernel + value_type: string + + - key: proc.nprocs + applications: + - Kernel + value_type: int + + - key: filesys.avail + applications: + - Disk + value_type: int + + - key: filesys.capacity + applications: + - Disk + value_type: int + + - key: filesys.free + applications: + - Disk + value_type: int + + - key: filesys.full + applications: + - Disk + value_type: float + + - key: filesys.used + applications: + - Disk + value_type: float diff --git a/roles/os_zabbix/vars/template_router.yml b/roles/os_zabbix/vars/template_router.yml new file mode 100644 index 000000000..4dae7da1e --- /dev/null +++ b/roles/os_zabbix/vars/template_router.yml @@ -0,0 +1,27 @@ +--- +g_template_router: + params: + name: Template Router + host: Template Router + groups: + - groupid: 1 # FIXME (not real) + output: extend + search: + name: Template Router + zitems: + - name: Router Backends down + hostid: + key_: router.backends.down + type: 2 + value_type: 0 + output: extend + search: + key_: router.backends.down + ztriggers: + - description: 'Number of router backends down on {HOST.NAME}' + expression: '{Template Router:router.backends.down.last()}<>0' + priority: 3 + searchWildcardsEnabled: True + search: + description: 'Number of router backends down on {HOST.NAME}' + expandExpression: True -- cgit v1.2.3 From 87d786993529b2f3a790fc7a3a83a93724f778ff Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Thu, 27 Aug 2015 16:47:01 -0400 Subject: update heartbeat check --- roles/os_zabbix/vars/template_heartbeat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_heartbeat.yml b/roles/os_zabbix/vars/template_heartbeat.yml index 8389485d3..3d0f3d51a 100644 --- a/roles/os_zabbix/vars/template_heartbeat.yml +++ b/roles/os_zabbix/vars/template_heartbeat.yml @@ -8,5 +8,5 @@ g_template_heartbeat: key: heartbeat.ping ztriggers: - description: 'Heartbeat.ping has failed on {HOST.NAME}' - expression: '{Template Heartbeat:heartbeat.ping.last()}<>0' + expression: '{Template Heartbeat:heartbeat.ping.nodata(20m)}=1' priority: avg -- cgit v1.2.3 From e13d5f1217129234b0c5e6c99b4a7d5ad7b87f6b Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Fri, 28 Aug 2015 16:34:56 -0400 Subject: added filesys.full --- roles/os_zabbix/vars/template_os_linux.yml | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 1c9d10bb0..9b56d20d4 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -147,27 +147,12 @@ g_template_os_linux: - Kernel value_type: int - - key: filesys.avail - applications: - - Disk - value_type: int - - - key: filesys.capacity - applications: - - Disk - value_type: int - - - key: filesys.free - applications: - - Disk - value_type: int - - - key: filesys.full + - key: filesys.full.xvda2 applications: - Disk value_type: float - - key: filesys.used + - key: filesys.full.xvda3 applications: - Disk value_type: float -- cgit v1.2.3 From 1f52ea8c4e2f8cfce51e98cb3614c61f0d78ec3e Mon Sep 17 00:00:00 2001 From: Thomas Wiest Date: Fri, 28 Aug 2015 18:03:59 -0400 Subject: added docker zabbix template, removed unused / old templates so they don't confuse other people. --- roles/os_zabbix/vars/template_docker.yml | 83 ++++++++++++++++++++++++++++++++ roles/os_zabbix/vars/template_host.yml | 27 ----------- roles/os_zabbix/vars/template_master.yml | 27 ----------- roles/os_zabbix/vars/template_node.yml | 27 ----------- roles/os_zabbix/vars/template_router.yml | 27 ----------- 5 files changed, 83 insertions(+), 108 deletions(-) create mode 100644 roles/os_zabbix/vars/template_docker.yml delete mode 100644 roles/os_zabbix/vars/template_host.yml delete mode 100644 roles/os_zabbix/vars/template_master.yml delete mode 100644 roles/os_zabbix/vars/template_node.yml delete mode 100644 roles/os_zabbix/vars/template_router.yml (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml new file mode 100644 index 000000000..eab497269 --- /dev/null +++ b/roles/os_zabbix/vars/template_docker.yml @@ -0,0 +1,83 @@ +--- +g_template_docker: + name: Template Docker + zitems: + - key: docker.ping + applications: + - Docker Daemon + value_type: int + + - key: docker.storage.is_loopback + applications: + - Docker Storage + value_type: int + + - key: docker.storage.data.space.total + applications: + - Docker Storage + value_type: float + + - key: docker.storage.data.space.used + applications: + - Docker Storage + value_type: float + + - key: docker.storage.data.space.available + applications: + - Docker Storage + value_type: float + + - key: docker.storage.data.space.percent_available + applications: + - Docker Storage + value_type: float + + - key: docker.storage.metadata.space.total + applications: + - Docker Storage + value_type: float + + - key: docker.storage.metadata.space.used + applications: + - Docker Storage + value_type: float + + - key: docker.storage.metadata.space.available + applications: + - Docker Storage + value_type: float + + - key: docker.storage.metadata.space.percent_available + applications: + - Docker Storage + value_type: float + ztriggers: + - description: 'docker.ping failed on {HOST.NAME}' + expression: '{Template Docker:docker.ping.max(#3)}<1' + priority: high + + - description: 'Docker storage is using LOOPBACK on {HOST.NAME}' + expression: '{Template Docker:docker.storage.is_loopback.last()}<>0' + priority: high + + - description: 'Critically low docker storage data space on {HOST.NAME}' + expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.data.space.available.max(#3)}<5' # < 5% or < 5GB + priority: high + + - description: 'Critically low docker storage metadata space on {HOST.NAME}' + expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.1' # < 5% or < 100MB + priority: high + + # Put triggers that depend on other triggers here (deps must be created first) + - description: 'Low docker storage data space on {HOST.NAME}' + expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.data.space.available.max(#3)}<10' # < 10% or < 10GB + dependencies: + - 'Critically low docker storage data space on {HOST.NAME}' + priority: average + + - description: 'Low docker storage metadata space on {HOST.NAME}' + expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.2' # < 10% or < 200MB + dependencies: + - 'Critically low docker storage metadata space on {HOST.NAME}' + priority: average + diff --git a/roles/os_zabbix/vars/template_host.yml b/roles/os_zabbix/vars/template_host.yml deleted file mode 100644 index e7cc667cb..000000000 --- a/roles/os_zabbix/vars/template_host.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -g_template_host: - params: - name: Template Host - host: Template Host - groups: - - groupid: 1 # FIXME (not real) - output: extend - search: - name: Template Host - zitems: - - name: Host Ping - hostid: - key_: host.ping - type: 2 - value_type: 0 - output: extend - search: - key_: host.ping - ztriggers: - - description: 'Host ping has failed on {HOST.NAME}' - expression: '{Template Host:host.ping.last()}<>0' - priority: 3 - searchWildcardsEnabled: True - search: - description: 'Host ping has failed on*' - expandExpression: True diff --git a/roles/os_zabbix/vars/template_master.yml b/roles/os_zabbix/vars/template_master.yml deleted file mode 100644 index 5f9b41a4f..000000000 --- a/roles/os_zabbix/vars/template_master.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -g_template_master: - params: - name: Template Master - host: Template Master - groups: - - groupid: 1 # FIXME (not real) - output: extend - search: - name: Template Master - zitems: - - name: Master Etcd Ping - hostid: - key_: master.etcd.ping - type: 2 - value_type: 0 - output: extend - search: - key_: master.etcd.ping - ztriggers: - - description: 'Master Etcd ping has failed on {HOST.NAME}' - expression: '{Template Master:master.etcd.ping.last()}<>0' - priority: 3 - searchWildcardsEnabled: True - search: - description: 'Master Etcd ping has failed on*' - expandExpression: True diff --git a/roles/os_zabbix/vars/template_node.yml b/roles/os_zabbix/vars/template_node.yml deleted file mode 100644 index 98c343a24..000000000 --- a/roles/os_zabbix/vars/template_node.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -g_template_node: - params: - name: Template Node - host: Template Node - groups: - - groupid: 1 # FIXME (not real) - output: extend - search: - name: Template Node - zitems: - - name: Kubelet Ping - hostid: - key_: kubelet.ping - type: 2 - value_type: 0 - output: extend - search: - key_: kubelet.ping - ztriggers: - - description: 'Kubelet ping has failed on {HOST.NAME}' - expression: '{Template Node:kubelet.ping.last()}<>0' - priority: 3 - searchWildcardsEnabled: True - search: - description: 'Kubelet ping has failed on*' - expandExpression: True diff --git a/roles/os_zabbix/vars/template_router.yml b/roles/os_zabbix/vars/template_router.yml deleted file mode 100644 index 4dae7da1e..000000000 --- a/roles/os_zabbix/vars/template_router.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -g_template_router: - params: - name: Template Router - host: Template Router - groups: - - groupid: 1 # FIXME (not real) - output: extend - search: - name: Template Router - zitems: - - name: Router Backends down - hostid: - key_: router.backends.down - type: 2 - value_type: 0 - output: extend - search: - key_: router.backends.down - ztriggers: - - description: 'Number of router backends down on {HOST.NAME}' - expression: '{Template Router:router.backends.down.last()}<>0' - priority: 3 - searchWildcardsEnabled: True - search: - description: 'Number of router backends down on {HOST.NAME}' - expandExpression: True -- cgit v1.2.3 From 3acccb3e51968744cbe9119fdbf1897739f3a94e Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Mon, 31 Aug 2015 15:06:14 -0400 Subject: Adding create app --- roles/os_zabbix/vars/template_openshift_master.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 roles/os_zabbix/vars/template_openshift_master.yml (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml new file mode 100644 index 000000000..c4e4d1c94 --- /dev/null +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -0,0 +1,12 @@ +--- +g_template_openshift_master: + name: Template Openshift Master + zitems: + - name: create_app + applications: + - Openshift Master + key: create_app + ztriggers: + - description: 'Application creation has failed on {HOST.NAME}' + expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' + priority: avg -- cgit v1.2.3 From 3f6b8e5ada944142644ab053f6505bd7aacf59db Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Mon, 31 Aug 2015 13:05:07 -0400 Subject: added triggers for disk space --- roles/os_zabbix/vars/template_os_linux.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 9b56d20d4..95e15c6e8 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -156,3 +156,20 @@ g_template_os_linux: applications: - Disk value_type: float + + ztriggers: + - description: 'Filesystem: / has less than 10% free on {HOST.NAME}' + expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' + priority: warn + + - description: 'Filesystem: / has less than 5% free on {HOST.NAME}' + expression: '{Template OS Linux:filesys.full.xvda2.last()}>95' + priority: high + + - description: 'Filesystem: /var has less than 10% free on {HOST.NAME}' + expression: '{Template OS Linux:filesys.full.xvda3.last()}>90' + priority: warn + + - description: 'Filesystem: /var has less than 5% free on {HOST.NAME}' + expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' + priority: high -- cgit v1.2.3 From f2a97b901b5adf6cb8a4875e0b296c5a0d509741 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Mon, 31 Aug 2015 16:07:22 -0400 Subject: added monitoring triggers --- roles/os_zabbix/vars/template_os_linux.yml | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 95e15c6e8..7bc8ddc5d 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -173,3 +173,11 @@ g_template_os_linux: - description: 'Filesystem: /var has less than 5% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' priority: high + + - description: 'Too many TOTAL processes on {HOST.NAME}' + expression: '{Template OS Linux:proc.nprocs.last()}>5000' + priority: warn + + - description: 'Lack of available memory on {HOST.NAME}' + expression: '{Template OS Linux:mem.freemem.last()}<3000' + priority: warn -- cgit v1.2.3 From 7d7e434421b0cd2283c95b457de9855c9eda29dc Mon Sep 17 00:00:00 2001 From: Thomas Wiest Date: Wed, 2 Sep 2015 13:54:36 -0400 Subject: relaxed the docker metadata triggers to be more in line with what docker-storage-setup allocates for metadata. --- roles/os_zabbix/vars/template_docker.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index eab497269..6a28f52b8 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -65,7 +65,7 @@ g_template_docker: priority: high - description: 'Critically low docker storage metadata space on {HOST.NAME}' - expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.1' # < 5% or < 100MB + expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.005' # < 5% or < 5MB priority: high # Put triggers that depend on other triggers here (deps must be created first) @@ -76,7 +76,7 @@ g_template_docker: priority: average - description: 'Low docker storage metadata space on {HOST.NAME}' - expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.2' # < 10% or < 200MB + expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.01' # < 10% or < 10MB dependencies: - 'Critically low docker storage metadata space on {HOST.NAME}' priority: average -- cgit v1.2.3 From 6760bf1eaaf8e37e4114a6da242285b601a4b125 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Thu, 3 Sep 2015 15:30:42 -0400 Subject: added url to the heartbeat check --- roles/os_zabbix/vars/template_heartbeat.yml | 1 + 1 file changed, 1 insertion(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_heartbeat.yml b/roles/os_zabbix/vars/template_heartbeat.yml index 3d0f3d51a..798377cd9 100644 --- a/roles/os_zabbix/vars/template_heartbeat.yml +++ b/roles/os_zabbix/vars/template_heartbeat.yml @@ -10,3 +10,4 @@ g_template_heartbeat: - description: 'Heartbeat.ping has failed on {HOST.NAME}' expression: '{Template Heartbeat:heartbeat.ping.nodata(20m)}=1' priority: avg + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_node_heartbeat.asciidoc' -- cgit v1.2.3 From b75b0dd8fec0787c2240b56f94f9940c11592098 Mon Sep 17 00:00:00 2001 From: Marek Mahut Date: Fri, 4 Sep 2015 16:49:27 +0200 Subject: Adding url to triggers --- roles/os_zabbix/vars/template_docker.yml | 6 ++++++ roles/os_zabbix/vars/template_openshift_master.yml | 1 + roles/os_zabbix/vars/template_os_linux.yml | 6 ++++++ 3 files changed, 13 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index 6a28f52b8..a1cd3519e 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -54,29 +54,35 @@ g_template_docker: ztriggers: - description: 'docker.ping failed on {HOST.NAME}' expression: '{Template Docker:docker.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc' priority: high - description: 'Docker storage is using LOOPBACK on {HOST.NAME}' expression: '{Template Docker:docker.storage.is_loopback.last()}<>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc' priority: high - description: 'Critically low docker storage data space on {HOST.NAME}' expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.data.space.available.max(#3)}<5' # < 5% or < 5GB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' priority: high - description: 'Critically low docker storage metadata space on {HOST.NAME}' expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.005' # < 5% or < 5MB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' priority: high # Put triggers that depend on other triggers here (deps must be created first) - description: 'Low docker storage data space on {HOST.NAME}' expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.data.space.available.max(#3)}<10' # < 10% or < 10GB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' dependencies: - 'Critically low docker storage data space on {HOST.NAME}' priority: average - description: 'Low docker storage metadata space on {HOST.NAME}' expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.01' # < 10% or < 10MB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' dependencies: - 'Critically low docker storage metadata space on {HOST.NAME}' priority: average diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index c4e4d1c94..d2c1365b0 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -9,4 +9,5 @@ g_template_openshift_master: ztriggers: - description: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' priority: avg diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 7bc8ddc5d..885dc893b 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -160,24 +160,30 @@ g_template_os_linux: ztriggers: - description: 'Filesystem: / has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - description: 'Filesystem: / has less than 5% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>95' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - description: 'Filesystem: /var has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda3.last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - description: 'Filesystem: /var has less than 5% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - description: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' priority: warn - description: 'Lack of available memory on {HOST.NAME}' expression: '{Template OS Linux:mem.freemem.last()}<3000' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc' priority: warn -- cgit v1.2.3 From 48e5718f42a7cc0df2b11cecd1b9bdffd71806c6 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Fri, 4 Sep 2015 15:53:13 -0400 Subject: added mem.util.available --- roles/os_zabbix/vars/template_os_linux.yml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 7bc8ddc5d..91b856fcb 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -102,6 +102,11 @@ g_template_os_linux: - Memory value_type: int + - key: mem.util.available + applications: + - Memory + value_type: int + - key: mem.util.used applications: - Memory -- cgit v1.2.3 From 4824691f00c04936da74ddfbbeb6e521ddb86d98 Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Tue, 8 Sep 2015 15:55:45 -0400 Subject: Adding desc, multiplier, and units to zabbix item --- roles/os_zabbix/vars/template_os_linux.yml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 7c446cd85..fad6af807 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -111,6 +111,9 @@ g_template_os_linux: applications: - Memory value_type: int + description: used memory + multiplier: 1024 + units: B - key: kernel.all.cpu.user applications: @@ -131,6 +134,9 @@ g_template_os_linux: applications: - Memory value_type: int + description: cached memory + multiplier: 1024 + units: B - key: kernel.all.cpu.steal applications: -- cgit v1.2.3 From 2f0bbb5781b270d88bccd5fcbe90723f9b3a5930 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Thu, 10 Sep 2015 11:26:22 -0400 Subject: updated triggers and items to have better descriptions and multipliers --- roles/os_zabbix/vars/template_docker.yml | 12 +- roles/os_zabbix/vars/template_heartbeat.yml | 2 +- roles/os_zabbix/vars/template_openshift_master.yml | 2 +- roles/os_zabbix/vars/template_os_linux.yml | 122 ++++++++++++--------- 4 files changed, 81 insertions(+), 57 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index a1cd3519e..395e054de 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -52,35 +52,35 @@ g_template_docker: - Docker Storage value_type: float ztriggers: - - description: 'docker.ping failed on {HOST.NAME}' + - name: 'docker.ping failed on {HOST.NAME}' expression: '{Template Docker:docker.ping.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc' priority: high - - description: 'Docker storage is using LOOPBACK on {HOST.NAME}' + - name: 'Docker storage is using LOOPBACK on {HOST.NAME}' expression: '{Template Docker:docker.storage.is_loopback.last()}<>0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc' priority: high - - description: 'Critically low docker storage data space on {HOST.NAME}' + - name: 'Critically low docker storage data space on {HOST.NAME}' expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.data.space.available.max(#3)}<5' # < 5% or < 5GB url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' priority: high - - description: 'Critically low docker storage metadata space on {HOST.NAME}' + - name: 'Critically low docker storage metadata space on {HOST.NAME}' expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.005' # < 5% or < 5MB url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' priority: high # Put triggers that depend on other triggers here (deps must be created first) - - description: 'Low docker storage data space on {HOST.NAME}' + - name: 'Low docker storage data space on {HOST.NAME}' expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.data.space.available.max(#3)}<10' # < 10% or < 10GB url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' dependencies: - 'Critically low docker storage data space on {HOST.NAME}' priority: average - - description: 'Low docker storage metadata space on {HOST.NAME}' + - name: 'Low docker storage metadata space on {HOST.NAME}' expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.01' # < 10% or < 10MB url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' dependencies: diff --git a/roles/os_zabbix/vars/template_heartbeat.yml b/roles/os_zabbix/vars/template_heartbeat.yml index 798377cd9..8dbe0d0d6 100644 --- a/roles/os_zabbix/vars/template_heartbeat.yml +++ b/roles/os_zabbix/vars/template_heartbeat.yml @@ -7,7 +7,7 @@ g_template_heartbeat: - Heartbeat key: heartbeat.ping ztriggers: - - description: 'Heartbeat.ping has failed on {HOST.NAME}' + - name: 'Heartbeat.ping has failed on {HOST.NAME}' expression: '{Template Heartbeat:heartbeat.ping.nodata(20m)}=1' priority: avg url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_node_heartbeat.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index d2c1365b0..728423ac1 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -7,7 +7,7 @@ g_template_openshift_master: - Openshift Master key: create_app ztriggers: - - description: 'Application creation has failed on {HOST.NAME}' + - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' priority: avg diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index fad6af807..3173c79b2 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -52,112 +52,135 @@ g_template_os_linux: - Kernel value_type: float - - key: mem.freemem + - key: kernel.all.cpu.nice applications: - - Memory + - Kernel value_type: int - - key: kernel.all.cpu.nice + - key: kernel.all.load.1_minute applications: - Kernel - value_type: int + value_type: float - - key: mem.util.bufmem + - key: kernel.uname.version applications: - - Memory - value_type: int + - Kernel + value_type: string - - key: swap.used + - key: kernel.all.uptime applications: - - Memory + - Kernel value_type: int - - key: kernel.all.load.1_minute + - key: kernel.all.cpu.user applications: - Kernel - value_type: float + value_type: int - - key: kernel.uname.version + - key: kernel.uname.machine applications: - Kernel value_type: string - - key: swap.length + - key: hinv.ncpu applications: - - Memory + - Kernel value_type: int - - key: mem.physmem + - key: kernel.all.cpu.steal applications: - - Memory + - Kernel value_type: int - - key: kernel.all.uptime + - key: kernel.all.pswitch applications: - Kernel value_type: int - - key: swap.free + - key: kernel.uname.release applications: - - Memory - value_type: int + - Kernel + value_type: string - - key: mem.util.available + - key: proc.nprocs applications: - - Memory + - Kernel value_type: int - - key: mem.util.used + # Memory Items + - key: mem.freemem applications: - Memory value_type: int - description: used memory + description: "PCP: free system memory metric from /proc/meminfo" multiplier: 1024 units: B - - key: kernel.all.cpu.user + - key: mem.util.bufmem applications: - - Kernel + - Memory value_type: int + description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: kernel.uname.machine + - key: swap.used applications: - - Kernel - value_type: string + - Memory + value_type: int + description: "PCP: swap used metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: hinv.ncpu + - key: swap.length applications: - - Kernel + - Memory value_type: int + description: "PCP: total swap available metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: mem.util.cached + - key: mem.physmem applications: - Memory value_type: int - description: cached memory + description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM." multiplier: 1024 units: B - - key: kernel.all.cpu.steal + - key: swap.free applications: - - Kernel + - Memory value_type: int + description: "PCP: swap free metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: kernel.all.pswitch + - key: mem.util.available applications: - - Kernel + - Memory value_type: int + description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo" + multiplier: 1024 + units: B - - key: kernel.uname.release + - key: mem.util.used applications: - - Kernel - value_type: string + - Memory + value_type: int + description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: proc.nprocs + - key: mem.util.cached applications: - - Kernel + - Memory value_type: int + description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo" + multiplier: 1024 + units: B + # Disk items - key: filesys.full.xvda2 applications: - Disk @@ -169,32 +192,33 @@ g_template_os_linux: value_type: float ztriggers: - - description: 'Filesystem: / has less than 10% free on {HOST.NAME}' + - name: 'Filesystem: / has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - description: 'Filesystem: / has less than 5% free on {HOST.NAME}' + - name: 'Filesystem: / has less than 5% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>95' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - - description: 'Filesystem: /var has less than 10% free on {HOST.NAME}' + - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda3.last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - description: 'Filesystem: /var has less than 5% free on {HOST.NAME}' + - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - - description: 'Too many TOTAL processes on {HOST.NAME}' + - name: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' priority: warn - - description: 'Lack of available memory on {HOST.NAME}' - expression: '{Template OS Linux:mem.freemem.last()}<3000' + - name: 'Lack of available memory on {HOST.NAME}' + expression: '{Template OS Linux:mem.freemem.last()}<30720000' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc' priority: warn + description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024' -- cgit v1.2.3 From 31035f3ee85ceadb74804c406788059e92db9891 Mon Sep 17 00:00:00 2001 From: Thomas Wiest Date: Thu, 17 Sep 2015 11:17:24 -0400 Subject: Added zabbix item openshift.master.process.exist and trigger to watch it. --- roles/os_zabbix/vars/template_openshift_master.yml | 17 +++++++++++++++++ roles/os_zabbix/vars/template_openshift_node.yml | 20 ++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 roles/os_zabbix/vars/template_openshift_node.yml (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 728423ac1..c71e07910 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -6,8 +6,25 @@ g_template_openshift_master: applications: - Openshift Master key: create_app + + - key: openshift.master.process.count + description: Shows number of master processes running + type: int + applications: + - Openshift Master + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' priority: avg + + - name: 'Openshift Master process not running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Too many Openshift Master processes running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml new file mode 100644 index 000000000..36f9cc4a3 --- /dev/null +++ b/roles/os_zabbix/vars/template_openshift_node.yml @@ -0,0 +1,20 @@ +--- +g_template_openshift_node: + name: Template Openshift Node + zitems: + - key: openshift.node.process.count + description: Shows number of OpenShift Node processes running + type: int + applications: + - Openshift Node + + ztriggers: + - name: 'Openshift Node process not running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc' + priority: high + + - name: 'Too many Openshift Node processes running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc' + priority: high -- cgit v1.2.3 From 19f91f7b7844ec3c89d8ee0e6a6c08655e897266 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Fri, 25 Sep 2015 12:05:03 -0400 Subject: added support for dynaic keys and items. added dynamic filesystems to zabbix --- roles/os_zabbix/vars/template_os_linux.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 3173c79b2..84a7740b0 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -191,6 +191,24 @@ g_template_os_linux: - Disk value_type: float + + zdiscoveryrules: + - name: disc.filesys + key: disc.filesys + lifetime: 1 + template_name: Template OS Linux + description: "Dynamically register the filesystems" + + zitemprototypes: + - discoveryrule_key: disc.filesys + template_name: Template OS Linux + name: "disc.filesys.full.{#OSO_FILESYS}" + key: "disc.filesys.full[{#OSO_FILESYS}]" + value_type: float + description: "PCP filesys.full option. This is the percent full returned from pcp filesys.full" + applications: + - Disk + ztriggers: - name: 'Filesystem: / has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' -- cgit v1.2.3 From 3a15b711e8d47e7365e361534533c28e234e87d3 Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Mon, 28 Sep 2015 12:03:43 -0400 Subject: Trigger prototype support --- roles/os_zabbix/vars/template_os_linux.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 84a7740b0..3c29c5d16 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -209,6 +209,17 @@ g_template_os_linux: applications: - Disk + ztriggerprototypes: + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: warn + + - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>95' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: high + ztriggers: - name: 'Filesystem: / has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' -- cgit v1.2.3 From e97fb3cb60863dc2733df71cdea3c2f3fb8ba76d Mon Sep 17 00:00:00 2001 From: Max Whittingham Date: Wed, 30 Sep 2015 08:39:15 -0500 Subject: Zabbix check and trigger for user count --- roles/os_zabbix/vars/template_openshift_master.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index c71e07910..f82771d59 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -13,6 +13,12 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.user.count + description: Shows number of users in a cluster + type: int + applications: + - Openshift Master + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -28,3 +34,9 @@ g_template_openshift_master: expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high + + - name: 'Number of users for Openshift Master on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.user.count.last(,3)}>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: info + -- cgit v1.2.3 From 0c24b639653d01ce4a0a730f9c579cfc30ec853b Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Wed, 30 Sep 2015 14:28:52 -0400 Subject: Updating these to the correct type --- roles/os_zabbix/vars/template_os_linux.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 3c29c5d16..fe72f003b 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -10,17 +10,17 @@ g_template_os_linux: - key: kernel.all.cpu.wait.total applications: - Kernel - value_type: int + value_type: float - key: kernel.all.cpu.irq.hard applications: - Kernel - value_type: int + value_type: float - key: kernel.all.cpu.idle applications: - Kernel - value_type: int + value_type: float - key: kernel.uname.distro applications: @@ -35,7 +35,7 @@ g_template_os_linux: - key: kernel.all.cpu.irq.soft applications: - Kernel - value_type: int + value_type: float - key: kernel.all.load.15_minute applications: @@ -45,7 +45,7 @@ g_template_os_linux: - key: kernel.all.cpu.sys applications: - Kernel - value_type: int + value_type: float - key: kernel.all.load.5_minute applications: @@ -55,7 +55,7 @@ g_template_os_linux: - key: kernel.all.cpu.nice applications: - Kernel - value_type: int + value_type: float - key: kernel.all.load.1_minute applications: @@ -75,7 +75,7 @@ g_template_os_linux: - key: kernel.all.cpu.user applications: - Kernel - value_type: int + value_type: float - key: kernel.uname.machine applications: @@ -90,7 +90,7 @@ g_template_os_linux: - key: kernel.all.cpu.steal applications: - Kernel - value_type: int + value_type: float - key: kernel.all.pswitch applications: -- cgit v1.2.3 From 608fea584b40346374f28858dc10f53f55e0adf6 Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Wed, 30 Sep 2015 17:07:36 -0400 Subject: Adding % to units --- roles/os_zabbix/vars/template_os_linux.yml | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index fe72f003b..36c890da9 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -11,16 +11,19 @@ g_template_os_linux: applications: - Kernel value_type: float + units: '%' - key: kernel.all.cpu.irq.hard applications: - Kernel value_type: float + units: '%' - key: kernel.all.cpu.idle applications: - Kernel value_type: float + units: '%' - key: kernel.uname.distro applications: @@ -36,6 +39,7 @@ g_template_os_linux: applications: - Kernel value_type: float + units: '%' - key: kernel.all.load.15_minute applications: @@ -46,6 +50,7 @@ g_template_os_linux: applications: - Kernel value_type: float + units: '%' - key: kernel.all.load.5_minute applications: @@ -56,6 +61,7 @@ g_template_os_linux: applications: - Kernel value_type: float + units: '%' - key: kernel.all.load.1_minute applications: @@ -76,6 +82,7 @@ g_template_os_linux: applications: - Kernel value_type: float + units: '%' - key: kernel.uname.machine applications: @@ -91,6 +98,7 @@ g_template_os_linux: applications: - Kernel value_type: float + units: '%' - key: kernel.all.pswitch applications: -- cgit v1.2.3 From 2c37e6282bcd260119b654b23acdad3e6ca4340c Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Thu, 1 Oct 2015 12:14:23 -0400 Subject: Adding trggers for cpu idle --- roles/os_zabbix/vars/template_os_linux.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 36c890da9..c81f39c58 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -259,3 +259,18 @@ g_template_os_linux: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc' priority: warn description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024' + + # CPU Utilization # + - name: 'CPU idle less than 5% on {HOST.NAME}' + expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<5 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<5' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' + priority: high + description: 'CPU is less than 5% idle' + + - name: 'CPU idle less than 10% on {HOST.NAME}' + expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<10 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<10' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' + priority: warn + description: 'CPU is less than 10% idle' + dependencies: + - 'CPU idle less than 5% on {HOST.NAME}' -- cgit v1.2.3 From e40b829103db88072c0e5fc759bff239cb214a43 Mon Sep 17 00:00:00 2001 From: Thomas Wiest Date: Wed, 30 Sep 2015 15:22:36 -0400 Subject: added Template Ops Tools --- roles/os_zabbix/vars/template_ops_tools.yml | 23 +++++++++++++++++++++++ roles/os_zabbix/vars/template_os_linux.yml | 2 -- 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 roles/os_zabbix/vars/template_ops_tools.yml (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_ops_tools.yml b/roles/os_zabbix/vars/template_ops_tools.yml new file mode 100644 index 000000000..d1b8a2514 --- /dev/null +++ b/roles/os_zabbix/vars/template_ops_tools.yml @@ -0,0 +1,23 @@ +--- +g_template_ops_tools: + name: Template Operations Tools + zdiscoveryrules: + - name: disc.ops.runner + key: disc.ops.runner + lifetime: 1 + description: "Dynamically register operations runner items" + + zitemprototypes: + - discoveryrule_key: disc.ops.runner + name: "Exit code of ops-runner[{#OSO_COMMAND}]" + key: "disc.ops.runner.command.exitcode[{#OSO_COMMAND}]" + value_type: int + description: "The exit code of the command run from ops-runner" + applications: + - Ops Runner + + ztriggerprototypes: + - name: 'ops-runner[{#OSO_COMMAND}]: non-zero exit code on {HOST.NAME}' + expression: '{Template Operations Tools:disc.ops.runner.command.exitcode[{#OSO_COMMAND}].last()}<>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_ops_runner_command.asciidoc' + priority: average diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index c81f39c58..70c3809bd 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -204,12 +204,10 @@ g_template_os_linux: - name: disc.filesys key: disc.filesys lifetime: 1 - template_name: Template OS Linux description: "Dynamically register the filesystems" zitemprototypes: - discoveryrule_key: disc.filesys - template_name: Template OS Linux name: "disc.filesys.full.{#OSO_FILESYS}" key: "disc.filesys.full[{#OSO_FILESYS}]" value_type: float -- cgit v1.2.3 From b03679b3ce1a52cfcc1c356690165eafe9b7c313 Mon Sep 17 00:00:00 2001 From: Max Whittingham Date: Mon, 5 Oct 2015 11:41:25 -0500 Subject: Changed trigger to only info alert if users is at 0 --- roles/os_zabbix/vars/template_openshift_master.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index f82771d59..68f810fe0 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -36,7 +36,7 @@ g_template_openshift_master: priority: high - name: 'Number of users for Openshift Master on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.user.count.last(,3)}>0' + expression: '{Template Openshift Master:openshift.master.user.count.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info -- cgit v1.2.3 From aacf1839073ebd40ea0e379abeac5754b0656987 Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Wed, 7 Oct 2015 16:56:46 -0400 Subject: Zabbix server template added --- .../os_zabbix/vars/template_app_zabbix_server.yml | 408 +++++++++++++++++++++ 1 file changed, 408 insertions(+) create mode 100644 roles/os_zabbix/vars/template_app_zabbix_server.yml (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml new file mode 100644 index 000000000..dace2aa29 --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml @@ -0,0 +1,408 @@ +--- +g_template_app_zabbix_server: + name: Template App Zabbix Server + zitems: + - key: housekeeper_creates + applications: + - Zabbix server + description: A simple count of the number of partition creates output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_drops + applications: + - Zabbix server + description: A simple count of the number of partition drops output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_errors + applications: + - Zabbix server + description: A simple count of the number of errors output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_total + applications: + - Zabbix server + description: A simple count of the total number of lines output by the housekeeper + script. + units: '' + value_type: int + zabbix_type: '2' + + - key: zabbix[process,alerter,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,configuration syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,db watchdog,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,discoverer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,escalator,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,history syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,housekeeper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,http poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,icmp pinger,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,ipmi poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,java poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,node watcher,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,proxy poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,self-monitoring,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,snmp trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,timer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,unreachable poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[queue,10m] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: '5' + + - key: zabbix[queue] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: '5' + + - key: zabbix[rcache,buffer,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,history,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,text,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,trend,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,values] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + ztriggers: + - description: "There has been unexpected output while running the housekeeping script\ + \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\ + \ and we've gotten something enw.\r\n\r\nCheck the script's output in /var/lib/zabbix/state\ + \ for more details." + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}+{Template App Zabbix Server:housekeeper_creates.last(0)}+{Template App Zabbix Server:housekeeper_drops.last(0)}<>{Template App Zabbix Server:housekeeper_total.last(0)}' + name: Unexpected output in Zabbix DB Housekeeping + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_DB_Housekeeping.asciidoc + + - description: An error has occurred during running the housekeeping script on the Zabbix. Check the script's output in /var/lib/zabbix/state for more details. + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}>0' + name: Errors during Zabbix DB Housekeeping + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,alerter,avg,busy].min(600)}>75' + name: Zabbix alerter processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,configuration syncer,avg,busy].min(600)}>75' + name: Zabbix configuration syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,db watchdog,avg,busy].min(600)}>75' + name: Zabbix db watchdog processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,discoverer,avg,busy].min(600)}>75' + name: Zabbix discoverer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,escalator,avg,busy].min(600)}>75' + name: Zabbix escalator processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,history syncer,avg,busy].min(600)}>75' + name: Zabbix history syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,housekeeper,avg,busy].min(1800)}>75' + name: Zabbix housekeeper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,http poller,avg,busy].min(600)}>75' + name: Zabbix http poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,icmp pinger,avg,busy].min(600)}>75' + name: Zabbix icmp pinger processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,ipmi poller,avg,busy].min(600)}>75' + name: Zabbix ipmi poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,java poller,avg,busy].min(600)}>75' + name: Zabbix java poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,node watcher,avg,busy].min(600)}>75' + name: Zabbix node watcher processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,poller,avg,busy].min(600)}>75' + name: Zabbix poller processes more than 75% busy + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,proxy poller,avg,busy].min(600)}>75' + name: Zabbix proxy poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,self-monitoring,avg,busy].min(600)}>75' + name: Zabbix self-monitoring processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,snmp trapper,avg,busy].min(600)}>75' + name: Zabbix snmp trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: Timer processes usually are busy because they have to process time + based trigger functions + expression: '{Template App Zabbix Server:zabbix[process,timer,avg,busy].min(600)}>75' + name: Zabbix timer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,trapper,avg,busy].min(600)}>75' + name: Zabbix trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,unreachable poller,avg,busy].min(600)}>75' + name: Zabbix unreachable poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: "This alert generally indicates a performance problem or a problem\ + \ with the zabbix-server or proxy.\r\n\r\nThe first place to check for issues\ + \ is Administration > Queue. Be sure to check the general view and the per-proxy\ + \ view." + expression: '{Template App Zabbix Server:zabbix[queue,10m].min(600)}>1000' + name: More than 1000 items having missing data for more than 10 minutes + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/data_lost_overview_plugin.asciidoc + + - description: Consider increasing CacheSize in the zabbix_server.conf configuration + file + expression: '{Template App Zabbix Server:zabbix[rcache,buffer,pfree].min(600)}<5' + name: Less than 5% free in the configuration cache + priority: info + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,history,pfree].min(600)}<25' + name: Less than 25% free in the history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,text,pfree].min(600)}<25' + name: Less than 25% free in the text history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,trend,pfree].min(600)}<25' + name: Less than 25% free in the trends cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc -- cgit v1.2.3 From 2ff94367be1e8239e13fb8b2ab17eac6d931a283 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Thu, 8 Oct 2015 13:39:44 -0400 Subject: removed static xvda checks --- roles/os_zabbix/vars/template_os_linux.yml | 32 ------------------------------ 1 file changed, 32 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 70c3809bd..232139df9 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -188,18 +188,6 @@ g_template_os_linux: multiplier: 1024 units: B - # Disk items - - key: filesys.full.xvda2 - applications: - - Disk - value_type: float - - - key: filesys.full.xvda3 - applications: - - Disk - value_type: float - - zdiscoveryrules: - name: disc.filesys key: disc.filesys @@ -227,26 +215,6 @@ g_template_os_linux: priority: high ztriggers: - - name: 'Filesystem: / has less than 10% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' - priority: warn - - - name: 'Filesystem: / has less than 5% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda2.last()}>95' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' - priority: high - - - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda3.last()}>90' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' - priority: warn - - - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' - priority: high - - name: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' -- cgit v1.2.3 From 14e5118471d0d51acb5b12b304b25dad1edf0c87 Mon Sep 17 00:00:00 2001 From: Sten Turpin Date: Thu, 1 Oct 2015 08:48:04 -0500 Subject: added running pods --- roles/os_zabbix/vars/template_openshift_master.yml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 68f810fe0..234ea12ce 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -19,6 +19,12 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.pod.running.count + description: Shows number of pods running + type: int + applications: + - Openshift Master + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' -- cgit v1.2.3 From 3d45adca50cd903cfdba02e21a7a034dab209518 Mon Sep 17 00:00:00 2001 From: Sten Turpin Date: Fri, 2 Oct 2015 16:04:41 -0500 Subject: fixed spacing --- roles/os_zabbix/vars/template_openshift_master.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 234ea12ce..4ae918ec6 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -19,11 +19,11 @@ g_template_openshift_master: applications: - Openshift Master - - key: openshift.master.pod.running.count - description: Shows number of pods running - type: int - applications: - - Openshift Master + - key: openshift.master.pod.running.count + description: Shows number of pods running + type: int + applications: + - Openshift Master ztriggers: - name: 'Application creation has failed on {HOST.NAME}' -- cgit v1.2.3 From e5f0b4944a434a51ae9b460d60a0e00a626158e6 Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Thu, 8 Oct 2015 15:18:05 -0400 Subject: Adding zabbix agent template --- roles/os_zabbix/vars/template_app_zabbix_agent.yml | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 roles/os_zabbix/vars/template_app_zabbix_agent.yml (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml new file mode 100644 index 000000000..06c4eda8b --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml @@ -0,0 +1,23 @@ +--- +g_template_app_zabbix_agent: + name: Template App Zabbix Agent + zitems: + - key: agent.hostname + applications: + - Zabbix agent + value_type: character + zabbix_type: '0' + + - key: agent.ping + applications: + - Zabbix agent + description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check. + value_type: int + zabbix_type: '0' + + ztriggers: + - name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes' + description: Zabbix agent is unreachable for 15 minutes. + expression: '{Template App Zabbix Agent:agent.ping.nodata(15m)}=1' + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_ping.asciidoc -- cgit v1.2.3 From 3766356b8d4349b187af5ca6b945c3f1efd0fc25 Mon Sep 17 00:00:00 2001 From: Sten Turpin Date: Thu, 8 Oct 2015 14:20:37 -0500 Subject: added items + triggers for ovs tests --- roles/os_zabbix/vars/template_openshift_master.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 4ae918ec6..a49c7581a 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -25,6 +25,18 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.ovs.ports.count + description: Shows number of OVS ports defined + type: int + applications: + - Openshift Master + + - key: openshift.master.ovs.pids.count + description: Shows number of ovs process ids running + type: int + applications: + - Openshift Master + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -46,3 +58,13 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info + - name: 'OVS may not be running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.ovs.pids.count.last()}!=4' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Number of OVS ports is 0 on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.ovs.ports.count.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + -- cgit v1.2.3 From 1cc663daa7bc81d9fe54422711d43ad3a704f654 Mon Sep 17 00:00:00 2001 From: Sten Turpin Date: Thu, 8 Oct 2015 15:24:05 -0500 Subject: fixed not equals syntax --- roles/os_zabbix/vars/template_openshift_master.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index a49c7581a..cbbc79075 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -59,7 +59,7 @@ g_template_openshift_master: priority: info - name: 'OVS may not be running on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.ovs.pids.count.last()}!=4' + expression: '{Template Openshift Master:openshift.master.ovs.pids.count.last()}<>4' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high -- cgit v1.2.3 From 62af1cd5770d954a5811c1153ba2d5851a8d7cec Mon Sep 17 00:00:00 2001 From: Sten Turpin Date: Thu, 8 Oct 2015 15:38:24 -0500 Subject: moved vswitch from master to node --- roles/os_zabbix/vars/template_openshift_master.yml | 22 ----------------- roles/os_zabbix/vars/template_openshift_node.yml | 28 ++++++++++++++++++++-- 2 files changed, 26 insertions(+), 24 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index cbbc79075..4ae918ec6 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -25,18 +25,6 @@ g_template_openshift_master: applications: - Openshift Master - - key: openshift.master.ovs.ports.count - description: Shows number of OVS ports defined - type: int - applications: - - Openshift Master - - - key: openshift.master.ovs.pids.count - description: Shows number of ovs process ids running - type: int - applications: - - Openshift Master - ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -58,13 +46,3 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info - - name: 'OVS may not be running on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.ovs.pids.count.last()}<>4' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' - priority: high - - - name: 'Number of OVS ports is 0 on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.ovs.ports.count.last()}=0' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' - priority: high - diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml index 36f9cc4a3..ce28b1048 100644 --- a/roles/os_zabbix/vars/template_openshift_node.yml +++ b/roles/os_zabbix/vars/template_openshift_node.yml @@ -8,13 +8,37 @@ g_template_openshift_node: applications: - Openshift Node + - key: openshift.node.ovs.pids.count + description: Shows number of ovs process ids running + type: int + applications: + - Openshift Node + + - key: openshift.node.ovs.ports.count + description: Shows number of OVS ports defined + type: int + applications: + - Openshift Node + ztriggers: - name: 'Openshift Node process not running on {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' priority: high - name: 'Too many Openshift Node processes running on {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + - name: 'OVS may not be running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last()}<>4' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' priority: high + + - name: 'Number of OVS ports is 0 on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + -- cgit v1.2.3 From ef6435fefc08730be4ceacc3b3048a7c8602aabd Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Fri, 9 Oct 2015 11:56:39 -0400 Subject: Raising limits to 90 and 85 for disk usage --- roles/os_zabbix/vars/template_os_linux.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 232139df9..cd9649773 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -204,13 +204,13 @@ g_template_os_linux: - Disk ztriggerprototypes: - - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}' - expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' + - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free on {HOST.NAME}' - expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>95' + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high -- cgit v1.2.3 From fee978537bbaea5aaee3bb7719ead8544f2d768a Mon Sep 17 00:00:00 2001 From: Gabor Burges Date: Sun, 11 Oct 2015 19:39:50 +0200 Subject: Update template_openshift_master.yml --- roles/os_zabbix/vars/template_openshift_master.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 4ae918ec6..1de4fefbb 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -24,6 +24,12 @@ g_template_openshift_master: type: int applications: - Openshift Master + + - key: openshift.project.counter + description: Shows number of projects on a cluster + type: int + applications: + - Openshift Master ztriggers: - name: 'Application creation has failed on {HOST.NAME}' @@ -46,3 +52,7 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info + - name: 'There are no projects running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.project.counter.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: info -- cgit v1.2.3 From bdda05ca0d4f62ffbc4f8e3691081d38266ca38b Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Mon, 12 Oct 2015 16:01:45 -0400 Subject: added the dynamic items to track free inodes --- roles/os_zabbix/vars/template_os_linux.yml | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index cd9649773..69432273f 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -203,17 +203,35 @@ g_template_os_linux: applications: - Disk + - discoveryrule_key: disc.filesys + name: "Percentage of used inodes on {#OSO_FILESYS}" + key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]" + value_type: float + description: "PCP derived value of percentage of used inodes on a filesystem." + applications: + - Disk + ztriggerprototypes: - - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free on {HOST.NAME}' + - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}' + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: warn + + - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: high + ztriggers: - name: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' -- cgit v1.2.3 From ba80a2c5b19614c87d2ce0568ed8830c62d40f95 Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Tue, 20 Oct 2015 17:00:14 -0400 Subject: Zabbix server stat fixes. enable the proper item types. --- roles/os_zabbix/vars/template_app_zabbix_agent.yml | 4 +- .../os_zabbix/vars/template_app_zabbix_server.yml | 62 +++++++++++----------- 2 files changed, 34 insertions(+), 32 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml index 06c4eda8b..6349b6384 100644 --- a/roles/os_zabbix/vars/template_app_zabbix_agent.yml +++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml @@ -6,14 +6,14 @@ g_template_app_zabbix_agent: applications: - Zabbix agent value_type: character - zabbix_type: '0' + zabbix_type: 0 - key: agent.ping applications: - Zabbix agent description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check. value_type: int - zabbix_type: '0' + zabbix_type: 0 ztriggers: - name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes' diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml index dace2aa29..185ed7ecd 100644 --- a/roles/os_zabbix/vars/template_app_zabbix_server.yml +++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml @@ -8,7 +8,7 @@ g_template_app_zabbix_server: description: A simple count of the number of partition creates output by the housekeeper script. units: '' value_type: int - zabbix_type: '2' + zabbix_type: 5 - key: housekeeper_drops applications: @@ -16,7 +16,7 @@ g_template_app_zabbix_server: description: A simple count of the number of partition drops output by the housekeeper script. units: '' value_type: int - zabbix_type: '2' + zabbix_type: 5 - key: housekeeper_errors applications: @@ -24,7 +24,7 @@ g_template_app_zabbix_server: description: A simple count of the number of errors output by the housekeeper script. units: '' value_type: int - zabbix_type: '2' + zabbix_type: 5 - key: housekeeper_total applications: @@ -33,7 +33,7 @@ g_template_app_zabbix_server: script. units: '' value_type: int - zabbix_type: '2' + zabbix_type: 5 - key: zabbix[process,alerter,avg,busy] applications: @@ -41,7 +41,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,configuration syncer,avg,busy] applications: @@ -49,7 +49,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,db watchdog,avg,busy] applications: @@ -57,7 +57,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,discoverer,avg,busy] applications: @@ -65,7 +65,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,escalator,avg,busy] applications: @@ -73,7 +73,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,history syncer,avg,busy] applications: @@ -81,7 +81,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,housekeeper,avg,busy] applications: @@ -89,7 +89,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,http poller,avg,busy] applications: @@ -97,7 +97,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,icmp pinger,avg,busy] applications: @@ -105,7 +105,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,ipmi poller,avg,busy] applications: @@ -113,7 +113,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,java poller,avg,busy] applications: @@ -121,7 +121,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,node watcher,avg,busy] applications: @@ -129,7 +129,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,poller,avg,busy] applications: @@ -137,7 +137,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,proxy poller,avg,busy] applications: @@ -145,7 +145,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,self-monitoring,avg,busy] applications: @@ -153,7 +153,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,snmp trapper,avg,busy] applications: @@ -161,7 +161,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,timer,avg,busy] applications: @@ -169,7 +169,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,trapper,avg,busy] applications: @@ -177,7 +177,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[process,unreachable poller,avg,busy] applications: @@ -185,7 +185,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[queue,10m] applications: @@ -193,7 +193,8 @@ g_template_app_zabbix_server: description: '' units: '' value_type: int - zabbix_type: '5' + zabbix_type: 5 + interval: 600 - key: zabbix[queue] applications: @@ -201,7 +202,8 @@ g_template_app_zabbix_server: description: '' units: '' value_type: int - zabbix_type: '5' + zabbix_type: 5 + interval: 600 - key: zabbix[rcache,buffer,pfree] applications: @@ -209,7 +211,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[wcache,history,pfree] applications: @@ -217,7 +219,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[wcache,text,pfree] applications: @@ -225,7 +227,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[wcache,trend,pfree] applications: @@ -233,7 +235,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: '5' + zabbix_type: 5 - key: zabbix[wcache,values] applications: @@ -241,7 +243,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: '5' + zabbix_type: 5 ztriggers: - description: "There has been unexpected output while running the housekeeping script\ \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\ -- cgit v1.2.3 From c16cc8398b4313cdee12a1332a268c8979481ac7 Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Tue, 20 Oct 2015 17:31:09 -0400 Subject: Adding delta feature to zbx_item --- roles/os_zabbix/vars/template_app_zabbix_server.yml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml index 185ed7ecd..aeec16254 100644 --- a/roles/os_zabbix/vars/template_app_zabbix_server.yml +++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml @@ -244,6 +244,8 @@ g_template_app_zabbix_server: units: '' value_type: float zabbix_type: 5 + delta: 1 # speed per second + ztriggers: - description: "There has been unexpected output while running the housekeeping script\ \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\ -- cgit v1.2.3 From 1192791e4689c0d80c74c25c2ae7765aa0a52a67 Mon Sep 17 00:00:00 2001 From: Joel Diaz Date: Thu, 22 Oct 2015 14:19:28 -0400 Subject: Lower priority to stop the paging action. --- roles/os_zabbix/vars/template_os_linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 69432273f..3ae1500bc 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -248,7 +248,7 @@ g_template_os_linux: - name: 'CPU idle less than 5% on {HOST.NAME}' expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<5 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<5' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' - priority: high + priority: average description: 'CPU is less than 5% idle' - name: 'CPU idle less than 10% on {HOST.NAME}' -- cgit v1.2.3