From 44ea44a3738f961222d5656e965923ef847b1bec Mon Sep 17 00:00:00 2001 From: Joel Diaz Date: Mon, 2 Nov 2015 11:06:39 -0500 Subject: get zabbix ready to start tracking status of pcp --- roles/os_zabbix/vars/template_performance_copilot.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 roles/os_zabbix/vars/template_performance_copilot.yml (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_performance_copilot.yml b/roles/os_zabbix/vars/template_performance_copilot.yml new file mode 100644 index 000000000..b62fa0228 --- /dev/null +++ b/roles/os_zabbix/vars/template_performance_copilot.yml @@ -0,0 +1,14 @@ +--- +g_template_performance_copilot: + name: Template Performance Copilot + zitems: + - key: pcp.ping + applications: + - Performance Copilot + value_type: int + + ztriggers: + - name: 'pcp.ping failed on {HOST.NAME}' + expression: '{Template Performance Copilot:pcp.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_pcp_ping.asciidoc' + priority: average -- cgit v1.2.3 From e35c5778604eb46be4079b47f786318c97a3f8b8 Mon Sep 17 00:00:00 2001 From: Marek Mahut Date: Wed, 14 Oct 2015 14:44:58 +0200 Subject: Adding openshift.node.etcd items --- roles/os_zabbix/vars/template_openshift_master.yml | 82 ++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 1de4fefbb..cd702a814 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -31,6 +31,78 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.etcd.create.success + description: Show number of successful create actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.create.fail + description: Show number of failed create actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.delete.success + description: Show number of successful delete actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.delete.fail + description: Show number of failed delete actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.get.success + description: Show number of successful get actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.get.fail + description: Show number of failed get actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.set.success + description: Show number of successful set actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.set.fail + description: Show number of failed set actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.update.success + description: Show number of successful update actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.update.fail + description: Show number of failed update actions + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.watchers + description: Show number of etcd watchers + type: int + applications: + - Openshift Master + + - key: openshift.master.etcd.ping + description: etcd ping + type: int + applications: + - Openshift Master + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -56,3 +128,13 @@ g_template_openshift_master: expression: '{Template Openshift Master:openshift.project.counter.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info + + - name: 'Low number of etcd watchers on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: avg + + - name: 'Etcd ping failed on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: high -- cgit v1.2.3 From 0f58ddd79113f00e9fab62b8938ba02dbe0bef12 Mon Sep 17 00:00:00 2001 From: Marek Mahut Date: Tue, 3 Nov 2015 17:09:27 +0100 Subject: Moving to Openshift Etcd application --- roles/os_zabbix/vars/template_openshift_master.yml | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index cd702a814..6defc4989 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -35,73 +35,73 @@ g_template_openshift_master: description: Show number of successful create actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.create.fail description: Show number of failed create actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.delete.success description: Show number of successful delete actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.delete.fail description: Show number of failed delete actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.get.success description: Show number of successful get actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.get.fail description: Show number of failed get actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.set.success description: Show number of successful set actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.set.fail description: Show number of failed set actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.update.success description: Show number of successful update actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.update.fail description: Show number of failed update actions type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.watchers description: Show number of etcd watchers type: int applications: - - Openshift Master + - Openshift Etcd - key: openshift.master.etcd.ping description: etcd ping type: int applications: - - Openshift Master + - Openshift Etcd ztriggers: - name: 'Application creation has failed on {HOST.NAME}' -- cgit v1.2.3 From 97dda64d5458dc7bd3edad720eb2a1e821b8b947 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Tue, 3 Nov 2015 14:02:40 -0500 Subject: added disk tps checks to zabbix --- roles/os_zabbix/vars/template_os_linux.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index aeeec4b8d..68de2dde5 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -194,6 +194,11 @@ g_template_os_linux: lifetime: 1 description: "Dynamically register the filesystems" + - name: disc.disk + key: disc.disk + lifetime: 1 + description: "Dynamically register disks on a node" + zitemprototypes: - discoveryrule_key: disc.filesys name: "disc.filesys.full.{#OSO_FILESYS}" @@ -211,6 +216,14 @@ g_template_os_linux: applications: - Disk + - discoveryrule_key: disc.disk + name: "TPS (IOPS) for disk {#OSO_DISK}" + key: "disc.disk.tps[{#OSO_FILESYS}]" + value_type: int + description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using" + applications: + - Disk + ztriggerprototypes: - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' -- cgit v1.2.3 From 4e04a3719e867ca0bd6b54440f4066fdd6f6751a Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Tue, 3 Nov 2015 15:31:36 -0500 Subject: fixed a dumb naming mistake --- roles/os_zabbix/vars/template_os_linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 68de2dde5..d494f1bad 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -218,7 +218,7 @@ g_template_os_linux: - discoveryrule_key: disc.disk name: "TPS (IOPS) for disk {#OSO_DISK}" - key: "disc.disk.tps[{#OSO_FILESYS}]" + key: "disc.disk.tps[{#OSO_DISK}]" value_type: int description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using" applications: -- cgit v1.2.3 From f1db40374cc98205118ff1f6320b8bffe9bf3dc1 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Wed, 4 Nov 2015 10:16:33 -0500 Subject: added the %util in zabbix --- roles/os_zabbix/vars/template_os_linux.yml | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'roles/os_zabbix/vars') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index d494f1bad..fbc20cd63 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -224,6 +224,14 @@ g_template_os_linux: applications: - Disk + - discoveryrule_key: disc.disk + name: "Percent Utilized for disk {#OSO_DISK}" + key: "disc.disk.putil[{#OSO_DISK}]" + value_type: float + description: "PCP disk.dev.avactive metric measured over a period of time. This is the '%util' in the iostat command" + applications: + - Disk + ztriggerprototypes: - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' -- cgit v1.2.3