summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix/vars/template_openshift_node.yml
diff options
context:
space:
mode:
Diffstat (limited to 'roles/os_zabbix/vars/template_openshift_node.yml')
-rw-r--r--roles/os_zabbix/vars/template_openshift_node.yml54
1 files changed, 52 insertions, 2 deletions
diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml
index 36f9cc4a3..66bd3a147 100644
--- a/roles/os_zabbix/vars/template_openshift_node.yml
+++ b/roles/os_zabbix/vars/template_openshift_node.yml
@@ -8,13 +8,63 @@ g_template_openshift_node:
applications:
- Openshift Node
+ - key: openshift.node.ovs.pids.count
+ description: Shows number of ovs process ids running
+ type: int
+ applications:
+ - Openshift Node
+
+ - key: openshift.node.ovs.ports.count
+ description: Shows number of OVS ports defined
+ type: int
+ applications:
+ - Openshift Node
+
+ - key: openshift.node.ovs.stray.rules
+ description: Number of OVS stray rules found/removed
+ type: int
+ applications:
+ - Openshift Node
+
+ - key: openshift.node.registry-pods.healthy_pct
+ description: Shows the percentage of healthy registries in the cluster
+ type: int
+ applications:
+ - Openshift Node
+
+ - key: openshift.node.registry.service.ping
+ description: Ping docker-registry service from node
+ type: int
+ applications:
+ - Openshift Node
+
ztriggers:
+ - name: 'One or more Docker Registries is unhealthy according to {HOST.NAME}'
+ expression: '{Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#2)}<100 and {Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#1)}<100'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
+ priority: avg
+
+ - name: 'Docker Registry service is unhealthy according to {HOST.NAME}'
+ expression: '{Template Openshift Node:openshift.node.registry.service.ping.last(#2)}<1 and {Template Openshift Node:openshift.node.registry.service.ping.last(#1)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
+ priority: avg
+
- name: 'Openshift Node process not running on {HOST.NAME}'
expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
+ url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
priority: high
- name: 'Too many Openshift Node processes running on {HOST.NAME}'
expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
+ url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
+ priority: high
+
+ - name: '[Heal] OVS may not be running on {HOST.NAME}'
+ expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last(#1)}<>4 and {Template Openshift Node:openshift.node.ovs.pids.count.last(#2)}<>4'
+ url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
+ priority: high
+
+ - name: 'Number of OVS ports is 0 on {HOST.NAME}'
+ expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0'
+ url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
priority: high