summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix/vars/template_os_linux.yml
diff options
context:
space:
mode:
Diffstat (limited to 'roles/os_zabbix/vars/template_os_linux.yml')
-rw-r--r--roles/os_zabbix/vars/template_os_linux.yml134
1 files changed, 112 insertions, 22 deletions
diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml
index 3173c79b2..c6e557f12 100644
--- a/roles/os_zabbix/vars/template_os_linux.yml
+++ b/roles/os_zabbix/vars/template_os_linux.yml
@@ -10,17 +10,20 @@ g_template_os_linux:
- key: kernel.all.cpu.wait.total
applications:
- Kernel
- value_type: int
+ value_type: float
+ units: '%'
- key: kernel.all.cpu.irq.hard
applications:
- Kernel
- value_type: int
+ value_type: float
+ units: '%'
- key: kernel.all.cpu.idle
applications:
- Kernel
- value_type: int
+ value_type: float
+ units: '%'
- key: kernel.uname.distro
applications:
@@ -35,7 +38,8 @@ g_template_os_linux:
- key: kernel.all.cpu.irq.soft
applications:
- Kernel
- value_type: int
+ value_type: float
+ units: '%'
- key: kernel.all.load.15_minute
applications:
@@ -45,7 +49,8 @@ g_template_os_linux:
- key: kernel.all.cpu.sys
applications:
- Kernel
- value_type: int
+ value_type: float
+ units: '%'
- key: kernel.all.load.5_minute
applications:
@@ -55,7 +60,8 @@ g_template_os_linux:
- key: kernel.all.cpu.nice
applications:
- Kernel
- value_type: int
+ value_type: float
+ units: '%'
- key: kernel.all.load.1_minute
applications:
@@ -75,7 +81,8 @@ g_template_os_linux:
- key: kernel.all.cpu.user
applications:
- Kernel
- value_type: int
+ value_type: float
+ units: '%'
- key: kernel.uname.machine
applications:
@@ -90,7 +97,8 @@ g_template_os_linux:
- key: kernel.all.cpu.steal
applications:
- Kernel
- value_type: int
+ value_type: float
+ units: '%'
- key: kernel.all.pswitch
applications:
@@ -180,38 +188,105 @@ g_template_os_linux:
multiplier: 1024
units: B
- # Disk items
- - key: filesys.full.xvda2
+ zdiscoveryrules:
+ - name: disc.filesys
+ key: disc.filesys
+ lifetime: 1
+ description: "Dynamically register the filesystems"
+
+ - name: disc.disk
+ key: disc.disk
+ lifetime: 1
+ description: "Dynamically register disks on a node"
+
+ - name: disc.network
+ key: disc.network
+ lifetime: 1
+ description: "Dynamically register network interfaces on a node"
+
+ zitemprototypes:
+ - discoveryrule_key: disc.filesys
+ name: "disc.filesys.full.{#OSO_FILESYS}"
+ key: "disc.filesys.full[{#OSO_FILESYS}]"
+ value_type: float
+ description: "PCP filesys.full option. This is the percent full returned from pcp filesys.full"
applications:
- Disk
+
+ - discoveryrule_key: disc.filesys
+ name: "Percentage of used inodes on {#OSO_FILESYS}"
+ key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]"
value_type: float
+ description: "PCP derived value of percentage of used inodes on a filesystem."
+ applications:
+ - Disk
- - key: filesys.full.xvda3
+ - discoveryrule_key: disc.disk
+ name: "TPS (IOPS) for disk {#OSO_DISK}"
+ key: "disc.disk.tps[{#OSO_DISK}]"
+ value_type: int
+ description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using"
applications:
- Disk
+
+ - discoveryrule_key: disc.disk
+ name: "Percent Utilized for disk {#OSO_DISK}"
+ key: "disc.disk.putil[{#OSO_DISK}]"
value_type: float
+ description: "PCP disk.dev.avactive metric measured over a period of time. This is the '%util' in the iostat command"
+ applications:
+ - Disk
- ztriggers:
- - name: 'Filesystem: / has less than 10% free on {HOST.NAME}'
- expression: '{Template OS Linux:filesys.full.xvda2.last()}>90'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: warn
+ - discoveryrule_key: disc.network
+ name: "Bytes per second IN on network interface {#OSO_NET_INTERFACE}"
+ key: "disc.network.in.bytes[{#OSO_NET_INTERFACE}]"
+ value_type: int
+ units: B
+ delta: 1
+ description: "PCP network.interface.in.bytes metric. This is setup as a delta in Zabbix to measure the speed per second"
+ applications:
+ - Network
- - name: 'Filesystem: / has less than 5% free on {HOST.NAME}'
- expression: '{Template OS Linux:filesys.full.xvda2.last()}>95'
+ - discoveryrule_key: disc.network
+ name: "Bytes per second OUT on network interface {#OSO_NET_INTERFACE}"
+ key: "disc.network.out.bytes[{#OSO_NET_INTERFACE}]"
+ value_type: int
+ units: B
+ delta: 1
+ description: "PCP network.interface.out.bytes metric. This is setup as a delta in Zabbix to measure the speed per second"
+ applications:
+ - Network
+
+ ztriggerprototypes:
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
- - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}'
- expression: '{Template OS Linux:filesys.full.xvda3.last()}>90'
+ # This has a dependency on the previous trigger
+ # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
+ dependencies:
+ - 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
- - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}'
- expression: '{Template OS Linux:filesys.full.xvda3.last()}>95'
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
+ # This has a dependency on the previous trigger
+ # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
+ priority: warn
+ dependencies:
+ - 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
+
+ ztriggers:
- name: 'Too many TOTAL processes on {HOST.NAME}'
expression: '{Template OS Linux:proc.nprocs.last()}>5000'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'
@@ -222,3 +297,18 @@ g_template_os_linux:
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'
priority: warn
description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024'
+
+ # CPU Utilization #
+ - name: 'CPU idle less than 5% on {HOST.NAME}'
+ expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<5'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
+ priority: average
+ description: 'CPU is less than 5% idle'
+
+ - name: 'CPU idle less than 10% on {HOST.NAME}'
+ expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<10'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
+ priority: average
+ description: 'CPU is less than 10% idle'
+ dependencies:
+ - 'CPU idle less than 5% on {HOST.NAME}'