diff options
Diffstat (limited to 'roles/os_zabbix/vars')
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 82 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_os_linux.yml | 6 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_performance_copilot.yml | 14 |
3 files changed, 99 insertions, 3 deletions
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 1de4fefbb..6defc4989 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -31,6 +31,78 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.etcd.create.success + description: Show number of successful create actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.create.fail + description: Show number of failed create actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.delete.success + description: Show number of successful delete actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.delete.fail + description: Show number of failed delete actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.get.success + description: Show number of successful get actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.get.fail + description: Show number of failed get actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.set.success + description: Show number of successful set actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.set.fail + description: Show number of failed set actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.update.success + description: Show number of successful update actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.update.fail + description: Show number of failed update actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.watchers + description: Show number of etcd watchers + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.ping + description: etcd ping + type: int + applications: + - Openshift Etcd + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -56,3 +128,13 @@ g_template_openshift_master: expression: '{Template Openshift Master:openshift.project.counter.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info + + - name: 'Low number of etcd watchers on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: avg + + - name: 'Etcd ping failed on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: high diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 3ae1500bc..aeeec4b8d 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -246,15 +246,15 @@ g_template_os_linux: # CPU Utilization # - name: 'CPU idle less than 5% on {HOST.NAME}' - expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<5 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<5' + expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<5' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' priority: average description: 'CPU is less than 5% idle' - name: 'CPU idle less than 10% on {HOST.NAME}' - expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<10 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<10' + expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<10' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' - priority: warn + priority: average description: 'CPU is less than 10% idle' dependencies: - 'CPU idle less than 5% on {HOST.NAME}' diff --git a/roles/os_zabbix/vars/template_performance_copilot.yml b/roles/os_zabbix/vars/template_performance_copilot.yml new file mode 100644 index 000000000..b62fa0228 --- /dev/null +++ b/roles/os_zabbix/vars/template_performance_copilot.yml @@ -0,0 +1,14 @@ +--- +g_template_performance_copilot: + name: Template Performance Copilot + zitems: + - key: pcp.ping + applications: + - Performance Copilot + value_type: int + + ztriggers: + - name: 'pcp.ping failed on {HOST.NAME}' + expression: '{Template Performance Copilot:pcp.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_pcp_ping.asciidoc' + priority: average |