summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix/vars
diff options
context:
space:
mode:
Diffstat (limited to 'roles/os_zabbix/vars')
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml93
-rw-r--r--roles/os_zabbix/vars/template_os_linux.yml34
2 files changed, 82 insertions, 45 deletions
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 6972ac877..514d6fd24 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -68,6 +68,36 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.pv.total.count
+ description: Total number of Persistent Volumes in the Openshift Cluster
+ type: int
+ applications:
+ - Openshift Master
+
+ - key: openshift.master.pv.available.count
+ description: Total number of Available Persistent Volumes in the Openshift Cluster
+ type: int
+ applications:
+ - Openshift Master
+
+ - key: openshift.master.pv.released.count
+ description: Total number of Released Persistent Volumes in the Openshift Cluster
+ type: int
+ applications:
+ - Openshift Master
+
+ - key: openshift.master.pv.bound.count
+ description: Total number of Bound Persistent Volumes in the Openshift Cluster
+ type: int
+ applications:
+ - Openshift Master
+
+ - key: openshift.master.pv.failed.count
+ description: Total number of Failed Persistent Volumes in the Openshift Cluster
+ type: int
+ applications:
+ - Openshift Master
+
- key: openshift.master.etcd.create.success
description: Show number of successful create actions
type: int
@@ -201,26 +231,6 @@ g_template_openshift_master:
- Openshift Master Metrics
ztriggers:
- - name: 'Application creation has failed on {HOST.NAME}'
- expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
- priority: avg
-
- - name: 'Openshift Master API health check is failing on {HOST.NAME}'
- expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
- priority: high
-
- - name: 'Openshift Master API PING check is failing on {HOST.NAME}'
- expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
- priority: high
-
- - name: 'Openshift Master metric PING check is failing on {HOST.NAME}'
- expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
- priority: avg
-
- name: 'Openshift Master process not running on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
@@ -231,6 +241,16 @@ g_template_openshift_master:
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
priority: high
+ - name: 'Low number of etcd watchers on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ priority: avg
+
+ - name: 'Etcd ping failed on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ priority: high
+
- name: 'Number of users for Openshift Master on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.user.count.last()}=0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
@@ -241,19 +261,40 @@ g_template_openshift_master:
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
priority: info
- - name: 'Low number of etcd watchers on {HOST.NAME}'
- expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ # Put triggers that depend on other triggers here (deps must be created first)
+ - name: 'Application creation has failed on {HOST.NAME}'
+ expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
priority: avg
- - name: 'Etcd ping failed on {HOST.NAME}'
- expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ - name: 'Openshift Master API health check is failing on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ priority: high
+
+ - name: 'Openshift Master API PING check is failing on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
priority: high
+ - name: 'Openshift Master metric PING check is failing on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ priority: avg
+
- name: 'Docker Registry check failed on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.registry.healthz.max(#2)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
priority: high
zgraphs:
diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml
index 79d52ef9b..c6e557f12 100644
--- a/roles/os_zabbix/vars/template_os_linux.yml
+++ b/roles/os_zabbix/vars/template_os_linux.yml
@@ -258,26 +258,34 @@ g_template_os_linux:
- Network
ztriggerprototypes:
- - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}'
- expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: warn
-
- name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
- - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}'
- expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90'
+ # This has a dependency on the previous trigger
+ # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
+ dependencies:
+ - 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
- name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
+ # This has a dependency on the previous trigger
+ # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
+ priority: warn
+ dependencies:
+ - 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
+
ztriggers:
- name: 'Too many TOTAL processes on {HOST.NAME}'
expression: '{Template OS Linux:proc.nprocs.last()}>5000'
@@ -304,15 +312,3 @@ g_template_os_linux:
description: 'CPU is less than 10% idle'
dependencies:
- 'CPU idle less than 5% on {HOST.NAME}'
-
- zgraphprototypes:
- - name: Network Interface Usage
- width: 1000
- height: 400
- graph_items:
- - item_name: "Bytes per second IN on network interface {#OSO_NET_INTERFACE}"
- item_type: prototype
- color: red
- - item_name: "Bytes per second OUT on network interface {#OSO_NET_INTERFACE}"
- item_type: prototype
- color: blue