summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix/vars
diff options
context:
space:
mode:
Diffstat (limited to 'roles/os_zabbix/vars')
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml8
-rw-r--r--roles/os_zabbix/vars/template_zagg_server.yml36
2 files changed, 44 insertions, 0 deletions
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 514d6fd24..a0ba8d104 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -269,6 +269,14 @@ g_template_openshift_master:
- 'Openshift Master process not running on {HOST.NAME}'
priority: avg
+ - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}'
+ expression: '{Template Openshift Master:create_app.sum(1h)}>3'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ description: The application create loop has failed 4 or more times in the last hour
+ priority: avg
+
- name: 'Openshift Master API health check is failing on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml
new file mode 100644
index 000000000..0e8e53bb7
--- /dev/null
+++ b/roles/os_zabbix/vars/template_zagg_server.yml
@@ -0,0 +1,36 @@
+---
+g_template_zagg_server:
+ name: Template Zagg Server
+ zitems:
+ - key: zagg.server.metrics.count
+ applications:
+ - Zagg Server
+ value_type: int
+
+ - key: zagg.server.processor.errors
+ applications:
+ - Zagg Server
+ value_type: int
+
+ - key: zagg.server.heartbeat.count
+ applications:
+ - Zagg Server
+ value_type: int
+
+ ztriggers:
+ - name: 'Error sending metrics on {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ priority: average
+
+ - name: 'Critically High number of metrics in Zagg queue {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>10000'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ priority: high
+
+ - name: 'High number of metrics in Zagg queue {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>5000'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ dependencies:
+ - 'Critically High number of metrics in Zagg queue {HOST.NAME}'
+ priority: average