diff options
Diffstat (limited to 'roles/os_zabbix')
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 25 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_node.yml | 22 |
2 files changed, 22 insertions, 25 deletions
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 1824d7881..e36f23a2b 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -7,12 +7,6 @@ g_template_openshift_master: - Openshift Master key: openshift.master.app.create - - key: openshift.master.registry.healthy_pct - description: "Shows the percentage of healthy registries in the cluster" - type: int - applications: - - Openshift Master - - key: openshift.master.process.count description: Shows number of master processes running type: int @@ -278,11 +272,6 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high - - name: 'Low number of etcd watchers on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' - priority: avg - - name: 'Etcd ping failed on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' @@ -345,20 +334,6 @@ g_template_openshift_master: - 'Openshift Master process not running on {HOST.NAME}' priority: avg - - name: 'One or more Docker Registries is unhealthy according to {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.registry.healthy_pct.last(#2)}<100 and {Template Openshift Master:openshift.master.registry.healthy_pct.max(#2)}>50' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc' - dependencies: - - 'Openshift Master process not running on {HOST.NAME}' - priority: avg - - - name: 'Multiple Docker Registries are unhealthy according to {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.registry.healthy_pct.last(#2)}<51' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc' - dependencies: - - 'Openshift Master process not running on {HOST.NAME}' - priority: high - - name: 'SkyDNS port not listening on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.skydns.port.open.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml index b0488656d..c36c593df 100644 --- a/roles/os_zabbix/vars/template_openshift_node.yml +++ b/roles/os_zabbix/vars/template_openshift_node.yml @@ -26,7 +26,29 @@ g_template_openshift_node: applications: - Openshift Node + - key: openshift.node.registry-pods.healthy_pct + description: Shows the percentage of healthy registries in the cluster + type: int + applications: + - Openshift Node + + - key: openshift.node.registry.service.ping + description: Ping docker-registry service from node + type: int + applications: + - Openshift Node + ztriggers: + - name: 'One or more Docker Registries is unhealthy according to {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#2)}<100 and {Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#1)}<100' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc' + priority: avg + + - name: 'Docker Registry service is unhealthy according to {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.registry.service.ping.last(#2)}<1 and {Template Openshift Node:openshift.node.registry.service.ping.last(#1)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc' + priority: avg + - name: 'Openshift Node process not running on {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' |