From 60b678aa338694d8eabeeab3a5f51e79a97ddaee Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Wed, 18 Nov 2015 16:49:36 -0500 Subject: added metric items to zabbix for openshift online --- roles/os_zabbix/vars/template_openshift_master.yml | 82 ++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 174486e15..512adad4c 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -13,6 +13,12 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.api.ping + description: "Verify that the Openshift API is up" + type: int + applications: + - Openshift Master + - key: openshift.master.api.healthz description: "Checks the healthz check of the master's api: https://master_host/healthz" type: int @@ -44,6 +50,12 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.node.count + description: Shows the total number of nodes found in the Openshift Cluster + type: int + applications: + - Openshift Master + - key: openshift.project.count description: Shows number of projects on a cluster type: int @@ -122,6 +134,66 @@ g_template_openshift_master: applications: - Openshift Etcd + - key: openshift.master.metric.ping + description: "This check verifies that the https://master/metrics check is alive and communicating properly." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.list.5 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.list.9 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.list.99 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.5 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the end to end scheduling operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.9 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the end to end scheduling operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.99 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the end to end scheduling operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -133,6 +205,16 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high + - name: 'Openshift Master API PING check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: avg + - name: 'Openshift Master process not running on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' -- cgit v1.2.3 From f2c27c2e8cf1dfd1d0e0f73f0a30a79435f33ecb Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Tue, 24 Nov 2015 16:23:19 -0500 Subject: added graphs --- roles/os_zabbix/tasks/main.yml | 40 ++++++++++++++++++++++ roles/os_zabbix/vars/template_openshift_master.yml | 34 ++++++++++++++++++ roles/os_zabbix/vars/template_os_linux.yml | 12 +++++++ 3 files changed, 86 insertions(+) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index 59c89bb02..d0b307a3d 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -8,15 +8,35 @@ register: templates - include_vars: template_heartbeat.yml + tags: + - heartbeat - include_vars: template_os_linux.yml + tags: + - linux - include_vars: template_docker.yml + tags: + - docker - include_vars: template_openshift_master.yml + tags: + - openshift_master - include_vars: template_openshift_node.yml + tags: + - openshift_node - include_vars: template_ops_tools.yml + tags: + - ops_tools - include_vars: template_app_zabbix_server.yml + tags: + - zabbix_server - include_vars: template_app_zabbix_agent.yml + tags: + - zabbix_agent - include_vars: template_performance_copilot.yml + tags: + - pcp - include_vars: template_aws.yml + tags: + - aws - name: Include Template Heartbeat include: ../../lib_zabbix/tasks/create_template.yml @@ -25,6 +45,8 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - heartbeat - name: Include Template os_linux include: ../../lib_zabbix/tasks/create_template.yml @@ -33,6 +55,8 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - linux - name: Include Template docker include: ../../lib_zabbix/tasks/create_template.yml @@ -41,6 +65,8 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - docker - name: Include Template Openshift Master include: ../../lib_zabbix/tasks/create_template.yml @@ -49,6 +75,8 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - openshift_master - name: Include Template Openshift Node include: ../../lib_zabbix/tasks/create_template.yml @@ -57,6 +85,8 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - openshift_node - name: Include Template Ops Tools include: ../../lib_zabbix/tasks/create_template.yml @@ -65,6 +95,8 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - ops_tools - name: Include Template App Zabbix Server include: ../../lib_zabbix/tasks/create_template.yml @@ -73,6 +105,8 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - zabbix_server - name: Include Template App Zabbix Agent include: ../../lib_zabbix/tasks/create_template.yml @@ -81,6 +115,8 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - zabbix_agent - name: Include Template Performance Copilot include: ../../lib_zabbix/tasks/create_template.yml @@ -89,6 +125,8 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - pcp - name: Include Template AWS include: ../../lib_zabbix/tasks/create_template.yml @@ -97,3 +135,5 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + tags: + - aws diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 512adad4c..8236cf135 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -244,3 +244,37 @@ g_template_openshift_master: expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' priority: high + + zgraphs: + - name: Openshift Master API Server Latency Pods LIST Quantiles + width: 900 + height: 200 + graph_items: + - item_name: openshift.master.apiserver.latency.summary.pods.quantile.list.5 + color: red + - item_name: openshift.master.apiserver.latency.summary.pods.quantile.list.9 + color: blue + - item_name: openshift.master.apiserver.latency.summary.pods.quantile.list.99 + color: orange + + - name: Openshift Master API Server Latency Pods WATCHLIST Quantiles + width: 900 + height: 200 + graph_items: + - item_name: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5 + color: red + - item_name: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9 + color: blue + - item_name: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99 + color: orange + + - name: Openshift Master Scheduler End to End Latency Quantiles + width: 900 + height: 200 + graph_items: + - item_name: openshift.master.scheduler.e2e.scheduling.latency.quantile.5 + color: red + - item_name: openshift.master.scheduler.e2e.scheduling.latency.quantile.9 + color: blue + - item_name: openshift.master.scheduler.e2e.scheduling.latency.quantile.99 + color: orange diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 04665be62..79d52ef9b 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -304,3 +304,15 @@ g_template_os_linux: description: 'CPU is less than 10% idle' dependencies: - 'CPU idle less than 5% on {HOST.NAME}' + + zgraphprototypes: + - name: Network Interface Usage + width: 1000 + height: 400 + graph_items: + - item_name: "Bytes per second IN on network interface {#OSO_NET_INTERFACE}" + item_type: prototype + color: red + - item_name: "Bytes per second OUT on network interface {#OSO_NET_INTERFACE}" + item_type: prototype + color: blue -- cgit v1.2.3 From 93befca22e7c72d2e8d9937cc695e6cace6f1b4a Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Tue, 1 Dec 2015 10:04:23 -0500 Subject: added docker registry cluster check --- roles/os_zabbix/vars/template_openshift_master.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 8236cf135..6972ac877 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -7,6 +7,12 @@ g_template_openshift_master: - Openshift Master key: create_app + - key: openshift.master.registry.healthz + description: "Shows the health status of the cluster's docker registry" + type: int + applications: + - Openshift Master + - key: openshift.master.process.count description: Shows number of master processes running type: int @@ -245,6 +251,11 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' priority: high + - name: 'Docker Registry check failed on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.registry.healthz.max(#2)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + zgraphs: - name: Openshift Master API Server Latency Pods LIST Quantiles width: 900 -- cgit v1.2.3 From 7e51e686092f553f0c164e4246ece3175e6af01b Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Thu, 3 Dec 2015 16:43:30 -0500 Subject: added the pv zabbix keys --- roles/os_zabbix/vars/template_openshift_master.yml | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 6972ac877..522e6bbe1 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -68,6 +68,36 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.pv.total.count + description: Total number of Persistent Volumes in the Openshift Cluster + type: int + applications: + - Openshift Master + + - key: openshift.master.pv.available.count + description: Total number of Available Persistent Volumes in the Openshift Cluster + type: int + applications: + - Openshift Master + + - key: openshift.master.pv.released.count + description: Total number of Released Persistent Volumes in the Openshift Cluster + type: int + applications: + - Openshift Master + + - key: openshift.master.pv.bound.count + description: Total number of Bound Persistent Volumes in the Openshift Cluster + type: int + applications: + - Openshift Master + + - key: openshift.master.pv.failed.count + description: Total number of Failed Persistent Volumes in the Openshift Cluster + type: int + applications: + - Openshift Master + - key: openshift.master.etcd.create.success description: Show number of successful create actions type: int -- cgit v1.2.3 From 8c1a1a8ad0fd2d675ace5872c5237eaec9ca668f Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Fri, 4 Dec 2015 09:42:06 -0500 Subject: Zabbix: added trigger dependencies to certain master checks --- roles/os_zabbix/vars/template_openshift_master.yml | 63 +++++++++++++--------- 1 file changed, 37 insertions(+), 26 deletions(-) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 522e6bbe1..514d6fd24 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -231,26 +231,6 @@ g_template_openshift_master: - Openshift Master Metrics ztriggers: - - name: 'Application creation has failed on {HOST.NAME}' - expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' - priority: avg - - - name: 'Openshift Master API health check is failing on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' - priority: high - - - name: 'Openshift Master API PING check is failing on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' - priority: high - - - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' - priority: avg - - name: 'Openshift Master process not running on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' @@ -261,6 +241,16 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high + - name: 'Low number of etcd watchers on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: avg + + - name: 'Etcd ping failed on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: high + - name: 'Number of users for Openshift Master on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.user.count.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' @@ -271,19 +261,40 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info - - name: 'Low number of etcd watchers on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + # Put triggers that depend on other triggers here (deps must be created first) + - name: 'Application creation has failed on {HOST.NAME}' + expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' + dependencies: + - 'Openshift Master process not running on {HOST.NAME}' priority: avg - - name: 'Etcd ping failed on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + - name: 'Openshift Master API health check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + dependencies: + - 'Openshift Master process not running on {HOST.NAME}' priority: high + - name: 'Openshift Master API PING check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + dependencies: + - 'Openshift Master process not running on {HOST.NAME}' + priority: high + + - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + dependencies: + - 'Openshift Master process not running on {HOST.NAME}' + priority: avg + - name: 'Docker Registry check failed on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.registry.healthz.max(#2)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + dependencies: + - 'Openshift Master process not running on {HOST.NAME}' priority: high zgraphs: -- cgit v1.2.3 From e763e8f09db16f5508387faf869941f626b76d41 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Fri, 4 Dec 2015 09:45:27 -0500 Subject: zabbix: removed ethernet graphs --- roles/os_zabbix/vars/template_os_linux.yml | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 79d52ef9b..04665be62 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -304,15 +304,3 @@ g_template_os_linux: description: 'CPU is less than 10% idle' dependencies: - 'CPU idle less than 5% on {HOST.NAME}' - - zgraphprototypes: - - name: Network Interface Usage - width: 1000 - height: 400 - graph_items: - - item_name: "Bytes per second IN on network interface {#OSO_NET_INTERFACE}" - item_type: prototype - color: red - - item_name: "Bytes per second OUT on network interface {#OSO_NET_INTERFACE}" - item_type: prototype - color: blue -- cgit v1.2.3 From 72917fe73008df533a46948759d4b4f66b65c2fc Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Fri, 4 Dec 2015 10:47:12 -0500 Subject: Zabbix: added dependency for disk check --- roles/os_zabbix/vars/template_os_linux.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 04665be62..023f1670b 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -258,16 +258,19 @@ g_template_os_linux: - Network ztriggerprototypes: - - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' - expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' - priority: warn - - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high + # This has a dependency on the previous trigger + - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: warn + dependencies: + - 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' -- cgit v1.2.3 From 92ce3c5fb25745beea179ace7814b7d4e9019c1b Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Fri, 4 Dec 2015 10:51:30 -0500 Subject: Zabbix: added dependency for inode disk check --- roles/os_zabbix/vars/template_os_linux.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 023f1670b..778e4341d 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -271,16 +271,18 @@ g_template_os_linux: dependencies: - 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' - - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' - expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' - priority: warn - - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: warn + dependencies: + - 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}' + ztriggers: - name: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' -- cgit v1.2.3 From 31d8f427ce356e93855157db65756091d7b09861 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Fri, 4 Dec 2015 11:07:46 -0500 Subject: Zabbix: put in a note about trigger prototype dependency --- roles/os_zabbix/vars/template_os_linux.yml | 3 +++ 1 file changed, 3 insertions(+) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 778e4341d..c6e557f12 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -264,6 +264,7 @@ g_template_os_linux: priority: high # This has a dependency on the previous trigger + # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0 - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' @@ -276,6 +277,8 @@ g_template_os_linux: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high + # This has a dependency on the previous trigger + # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0 - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' -- cgit v1.2.3