summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix/vars
diff options
context:
space:
mode:
authorKenny Woodson <kwoodson@redhat.com>2015-11-04 11:59:28 -0500
committerKenny Woodson <kwoodson@redhat.com>2015-11-04 11:59:28 -0500
commit7869fb8c26a96c1e0ee74b930fd0da8a9952cb52 (patch)
tree37b83c517769c4730b680b602672ba3f9245137a /roles/os_zabbix/vars
parenta706860af853be6d8bc77ea38593467e827ed527 (diff)
parentf5e83fc2330d6f784df3859f5305d4b80cc0c469 (diff)
downloadopenshift-7869fb8c26a96c1e0ee74b930fd0da8a9952cb52.tar.gz
openshift-7869fb8c26a96c1e0ee74b930fd0da8a9952cb52.tar.bz2
openshift-7869fb8c26a96c1e0ee74b930fd0da8a9952cb52.tar.xz
openshift-7869fb8c26a96c1e0ee74b930fd0da8a9952cb52.zip
Merge pull request #811 from openshift/master
master to prod
Diffstat (limited to 'roles/os_zabbix/vars')
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml82
-rw-r--r--roles/os_zabbix/vars/template_os_linux.yml21
-rw-r--r--roles/os_zabbix/vars/template_performance_copilot.yml14
3 files changed, 117 insertions, 0 deletions
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 1de4fefbb..6defc4989 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -31,6 +31,78 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.etcd.create.success
+ description: Show number of successful create actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.create.fail
+ description: Show number of failed create actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.delete.success
+ description: Show number of successful delete actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.delete.fail
+ description: Show number of failed delete actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.get.success
+ description: Show number of successful get actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.get.fail
+ description: Show number of failed get actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.set.success
+ description: Show number of successful set actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.set.fail
+ description: Show number of failed set actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.update.success
+ description: Show number of successful update actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.update.fail
+ description: Show number of failed update actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.watchers
+ description: Show number of etcd watchers
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.ping
+ description: etcd ping
+ type: int
+ applications:
+ - Openshift Etcd
+
ztriggers:
- name: 'Application creation has failed on {HOST.NAME}'
expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
@@ -56,3 +128,13 @@ g_template_openshift_master:
expression: '{Template Openshift Master:openshift.project.counter.last()}=0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
priority: info
+
+ - name: 'Low number of etcd watchers on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ priority: avg
+
+ - name: 'Etcd ping failed on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ priority: high
diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml
index aeeec4b8d..fbc20cd63 100644
--- a/roles/os_zabbix/vars/template_os_linux.yml
+++ b/roles/os_zabbix/vars/template_os_linux.yml
@@ -194,6 +194,11 @@ g_template_os_linux:
lifetime: 1
description: "Dynamically register the filesystems"
+ - name: disc.disk
+ key: disc.disk
+ lifetime: 1
+ description: "Dynamically register disks on a node"
+
zitemprototypes:
- discoveryrule_key: disc.filesys
name: "disc.filesys.full.{#OSO_FILESYS}"
@@ -211,6 +216,22 @@ g_template_os_linux:
applications:
- Disk
+ - discoveryrule_key: disc.disk
+ name: "TPS (IOPS) for disk {#OSO_DISK}"
+ key: "disc.disk.tps[{#OSO_DISK}]"
+ value_type: int
+ description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using"
+ applications:
+ - Disk
+
+ - discoveryrule_key: disc.disk
+ name: "Percent Utilized for disk {#OSO_DISK}"
+ key: "disc.disk.putil[{#OSO_DISK}]"
+ value_type: float
+ description: "PCP disk.dev.avactive metric measured over a period of time. This is the '%util' in the iostat command"
+ applications:
+ - Disk
+
ztriggerprototypes:
- name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}'
expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
diff --git a/roles/os_zabbix/vars/template_performance_copilot.yml b/roles/os_zabbix/vars/template_performance_copilot.yml
new file mode 100644
index 000000000..b62fa0228
--- /dev/null
+++ b/roles/os_zabbix/vars/template_performance_copilot.yml
@@ -0,0 +1,14 @@
+---
+g_template_performance_copilot:
+ name: Template Performance Copilot
+ zitems:
+ - key: pcp.ping
+ applications:
+ - Performance Copilot
+ value_type: int
+
+ ztriggers:
+ - name: 'pcp.ping failed on {HOST.NAME}'
+ expression: '{Template Performance Copilot:pcp.ping.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_pcp_ping.asciidoc'
+ priority: average