summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix
diff options
context:
space:
mode:
Diffstat (limited to 'roles/os_zabbix')
-rw-r--r--roles/os_zabbix/tasks/main.yml18
-rw-r--r--roles/os_zabbix/vars/template_config_loop.yml14
-rw-r--r--roles/os_zabbix/vars/template_docker.yml2
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml26
4 files changed, 56 insertions, 4 deletions
diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml
index a8b65dd56..1c8d88854 100644
--- a/roles/os_zabbix/tasks/main.yml
+++ b/roles/os_zabbix/tasks/main.yml
@@ -1,8 +1,4 @@
---
-- fail:
- msg: "Zabbix config is not yet supported on atomic hosts"
- when: openshift.common.is_containerized | bool
-
- name: Main List all templates
zbx_template:
zbx_server: "{{ ozb_server }}"
@@ -45,6 +41,10 @@
tags:
- zagg_server
+- include_vars: template_config_loop.yml
+ tags:
+ - config_loop
+
- name: Include Template Heartbeat
include: ../../lib_zabbix/tasks/create_template.yml
vars:
@@ -154,3 +154,13 @@
password: "{{ ozb_password }}"
tags:
- zagg_server
+
+- name: Include Template Config Loop
+ include: ../../lib_zabbix/tasks/create_template.yml
+ vars:
+ template: "{{ g_template_config_loop }}"
+ server: "{{ ozb_server }}"
+ user: "{{ ozb_user }}"
+ password: "{{ ozb_password }}"
+ tags:
+ - config_loop
diff --git a/roles/os_zabbix/vars/template_config_loop.yml b/roles/os_zabbix/vars/template_config_loop.yml
new file mode 100644
index 000000000..823da1868
--- /dev/null
+++ b/roles/os_zabbix/vars/template_config_loop.yml
@@ -0,0 +1,14 @@
+---
+g_template_config_loop:
+ name: Template Config Loop
+ zitems:
+ - key: config_loop.run.exit_code
+ applications:
+ - Config Loop
+ value_type: int
+
+ ztriggers:
+ - name: 'config_loop.run.exit_code not zero on {HOST.NAME}'
+ expression: '{Template Config Loop:config_loop.run.exit_code.min(#2)}>0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_config_loop.asciidoc'
+ priority: average
diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml
index a05e552e3..dd13e76f7 100644
--- a/roles/os_zabbix/vars/template_docker.yml
+++ b/roles/os_zabbix/vars/template_docker.yml
@@ -72,10 +72,12 @@ g_template_docker:
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc'
priority: high
+ # Re-enable for OpenShift 3.1.1 (https://bugzilla.redhat.com/show_bug.cgi?id=1292971#c6)
- name: 'docker.container.dns.resolution failed on {HOST.NAME}'
expression: '{Template Docker:docker.container.dns.resolution.min(#3)}>0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc'
priority: average
+ status: disabled
- name: 'docker.container.existing.dns.resolution.failed on {HOST.NAME}'
expression: '{Template Docker:docker.container.existing.dns.resolution.failed.min(#3)}>0'
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index a0ba8d104..12ea36c8b 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -98,6 +98,18 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.skydns.port.open
+ description: State of the SkyDNS port open and listening
+ type: int
+ applications:
+ - Openshift Master
+
+ - key: openshift.master.skydns.query
+ description: SkyDNS can be queried or not
+ type: int
+ applications:
+ - Openshift Master
+
- key: openshift.master.etcd.create.success
description: Show number of successful create actions
type: int
@@ -305,6 +317,20 @@ g_template_openshift_master:
- 'Openshift Master process not running on {HOST.NAME}'
priority: high
+ - name: 'SkyDNS port not listening on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.skydns.port.open.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ priority: high
+
+ - name: 'SkyDNS query failed on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.skydns.query.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ dependencies:
+ - 'Openshift Master API health check is failing on {HOST.NAME}'
+ priority: high
+
zgraphs:
- name: Openshift Master API Server Latency Pods LIST Quantiles
width: 900