summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix
diff options
context:
space:
mode:
Diffstat (limited to 'roles/os_zabbix')
-rw-r--r--roles/os_zabbix/tasks/main.yml9
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml82
-rw-r--r--roles/os_zabbix/vars/template_os_linux.yml21
-rw-r--r--roles/os_zabbix/vars/template_performance_copilot.yml14
4 files changed, 126 insertions, 0 deletions
diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml
index a503b24d7..82bf78b57 100644
--- a/roles/os_zabbix/tasks/main.yml
+++ b/roles/os_zabbix/tasks/main.yml
@@ -15,6 +15,7 @@
- include_vars: template_ops_tools.yml
- include_vars: template_app_zabbix_server.yml
- include_vars: template_app_zabbix_agent.yml
+- include_vars: template_performance_copilot.yml
- name: Include Template Heartbeat
include: ../../lib_zabbix/tasks/create_template.yml
@@ -79,3 +80,11 @@
server: "{{ ozb_server }}"
user: "{{ ozb_user }}"
password: "{{ ozb_password }}"
+
+- name: Include Template Performance Copilot
+ include: ../../lib_zabbix/tasks/create_template.yml
+ vars:
+ template: "{{ g_template_performance_copilot }}"
+ server: "{{ ozb_server }}"
+ user: "{{ ozb_user }}"
+ password: "{{ ozb_password }}"
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 1de4fefbb..6defc4989 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -31,6 +31,78 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.etcd.create.success
+ description: Show number of successful create actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.create.fail
+ description: Show number of failed create actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.delete.success
+ description: Show number of successful delete actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.delete.fail
+ description: Show number of failed delete actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.get.success
+ description: Show number of successful get actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.get.fail
+ description: Show number of failed get actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.set.success
+ description: Show number of successful set actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.set.fail
+ description: Show number of failed set actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.update.success
+ description: Show number of successful update actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.update.fail
+ description: Show number of failed update actions
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.watchers
+ description: Show number of etcd watchers
+ type: int
+ applications:
+ - Openshift Etcd
+
+ - key: openshift.master.etcd.ping
+ description: etcd ping
+ type: int
+ applications:
+ - Openshift Etcd
+
ztriggers:
- name: 'Application creation has failed on {HOST.NAME}'
expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
@@ -56,3 +128,13 @@ g_template_openshift_master:
expression: '{Template Openshift Master:openshift.project.counter.last()}=0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
priority: info
+
+ - name: 'Low number of etcd watchers on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ priority: avg
+
+ - name: 'Etcd ping failed on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+ priority: high
diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml
index aeeec4b8d..fbc20cd63 100644
--- a/roles/os_zabbix/vars/template_os_linux.yml
+++ b/roles/os_zabbix/vars/template_os_linux.yml
@@ -194,6 +194,11 @@ g_template_os_linux:
lifetime: 1
description: "Dynamically register the filesystems"
+ - name: disc.disk
+ key: disc.disk
+ lifetime: 1
+ description: "Dynamically register disks on a node"
+
zitemprototypes:
- discoveryrule_key: disc.filesys
name: "disc.filesys.full.{#OSO_FILESYS}"
@@ -211,6 +216,22 @@ g_template_os_linux:
applications:
- Disk
+ - discoveryrule_key: disc.disk
+ name: "TPS (IOPS) for disk {#OSO_DISK}"
+ key: "disc.disk.tps[{#OSO_DISK}]"
+ value_type: int
+ description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using"
+ applications:
+ - Disk
+
+ - discoveryrule_key: disc.disk
+ name: "Percent Utilized for disk {#OSO_DISK}"
+ key: "disc.disk.putil[{#OSO_DISK}]"
+ value_type: float
+ description: "PCP disk.dev.avactive metric measured over a period of time. This is the '%util' in the iostat command"
+ applications:
+ - Disk
+
ztriggerprototypes:
- name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}'
expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
diff --git a/roles/os_zabbix/vars/template_performance_copilot.yml b/roles/os_zabbix/vars/template_performance_copilot.yml
new file mode 100644
index 000000000..b62fa0228
--- /dev/null
+++ b/roles/os_zabbix/vars/template_performance_copilot.yml
@@ -0,0 +1,14 @@
+---
+g_template_performance_copilot:
+ name: Template Performance Copilot
+ zitems:
+ - key: pcp.ping
+ applications:
+ - Performance Copilot
+ value_type: int
+
+ ztriggers:
+ - name: 'pcp.ping failed on {HOST.NAME}'
+ expression: '{Template Performance Copilot:pcp.ping.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_pcp_ping.asciidoc'
+ priority: average