summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix/vars/template_os_linux.yml
diff options
context:
space:
mode:
Diffstat (limited to 'roles/os_zabbix/vars/template_os_linux.yml')
-rw-r--r--roles/os_zabbix/vars/template_os_linux.yml260
1 files changed, 260 insertions, 0 deletions
diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml
new file mode 100644
index 000000000..3ae1500bc
--- /dev/null
+++ b/roles/os_zabbix/vars/template_os_linux.yml
@@ -0,0 +1,260 @@
+---
+g_template_os_linux:
+ name: Template OS Linux
+ zitems:
+ - key: kernel.uname.sysname
+ applications:
+ - Kernel
+ value_type: string
+
+ - key: kernel.all.cpu.wait.total
+ applications:
+ - Kernel
+ value_type: float
+ units: '%'
+
+ - key: kernel.all.cpu.irq.hard
+ applications:
+ - Kernel
+ value_type: float
+ units: '%'
+
+ - key: kernel.all.cpu.idle
+ applications:
+ - Kernel
+ value_type: float
+ units: '%'
+
+ - key: kernel.uname.distro
+ applications:
+ - Kernel
+ value_type: string
+
+ - key: kernel.uname.nodename
+ applications:
+ - Kernel
+ value_type: string
+
+ - key: kernel.all.cpu.irq.soft
+ applications:
+ - Kernel
+ value_type: float
+ units: '%'
+
+ - key: kernel.all.load.15_minute
+ applications:
+ - Kernel
+ value_type: float
+
+ - key: kernel.all.cpu.sys
+ applications:
+ - Kernel
+ value_type: float
+ units: '%'
+
+ - key: kernel.all.load.5_minute
+ applications:
+ - Kernel
+ value_type: float
+
+ - key: kernel.all.cpu.nice
+ applications:
+ - Kernel
+ value_type: float
+ units: '%'
+
+ - key: kernel.all.load.1_minute
+ applications:
+ - Kernel
+ value_type: float
+
+ - key: kernel.uname.version
+ applications:
+ - Kernel
+ value_type: string
+
+ - key: kernel.all.uptime
+ applications:
+ - Kernel
+ value_type: int
+
+ - key: kernel.all.cpu.user
+ applications:
+ - Kernel
+ value_type: float
+ units: '%'
+
+ - key: kernel.uname.machine
+ applications:
+ - Kernel
+ value_type: string
+
+ - key: hinv.ncpu
+ applications:
+ - Kernel
+ value_type: int
+
+ - key: kernel.all.cpu.steal
+ applications:
+ - Kernel
+ value_type: float
+ units: '%'
+
+ - key: kernel.all.pswitch
+ applications:
+ - Kernel
+ value_type: int
+
+ - key: kernel.uname.release
+ applications:
+ - Kernel
+ value_type: string
+
+ - key: proc.nprocs
+ applications:
+ - Kernel
+ value_type: int
+
+ # Memory Items
+ - key: mem.freemem
+ applications:
+ - Memory
+ value_type: int
+ description: "PCP: free system memory metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
+
+ - key: mem.util.bufmem
+ applications:
+ - Memory
+ value_type: int
+ description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
+
+ - key: swap.used
+ applications:
+ - Memory
+ value_type: int
+ description: "PCP: swap used metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
+
+ - key: swap.length
+ applications:
+ - Memory
+ value_type: int
+ description: "PCP: total swap available metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
+
+ - key: mem.physmem
+ applications:
+ - Memory
+ value_type: int
+ description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM."
+ multiplier: 1024
+ units: B
+
+ - key: swap.free
+ applications:
+ - Memory
+ value_type: int
+ description: "PCP: swap free metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
+
+ - key: mem.util.available
+ applications:
+ - Memory
+ value_type: int
+ description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo"
+ multiplier: 1024
+ units: B
+
+ - key: mem.util.used
+ applications:
+ - Memory
+ value_type: int
+ description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
+
+ - key: mem.util.cached
+ applications:
+ - Memory
+ value_type: int
+ description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
+
+ zdiscoveryrules:
+ - name: disc.filesys
+ key: disc.filesys
+ lifetime: 1
+ description: "Dynamically register the filesystems"
+
+ zitemprototypes:
+ - discoveryrule_key: disc.filesys
+ name: "disc.filesys.full.{#OSO_FILESYS}"
+ key: "disc.filesys.full[{#OSO_FILESYS}]"
+ value_type: float
+ description: "PCP filesys.full option. This is the percent full returned from pcp filesys.full"
+ applications:
+ - Disk
+
+ - discoveryrule_key: disc.filesys
+ name: "Percentage of used inodes on {#OSO_FILESYS}"
+ key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]"
+ value_type: float
+ description: "PCP derived value of percentage of used inodes on a filesystem."
+ applications:
+ - Disk
+
+ ztriggerprototypes:
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
+ priority: warn
+
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
+ priority: high
+
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
+ priority: warn
+
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
+ priority: high
+
+ ztriggers:
+ - name: 'Too many TOTAL processes on {HOST.NAME}'
+ expression: '{Template OS Linux:proc.nprocs.last()}>5000'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'
+ priority: warn
+
+ - name: 'Lack of available memory on {HOST.NAME}'
+ expression: '{Template OS Linux:mem.freemem.last()}<30720000'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'
+ priority: warn
+ description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024'
+
+ # CPU Utilization #
+ - name: 'CPU idle less than 5% on {HOST.NAME}'
+ expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<5 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<5'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
+ priority: average
+ description: 'CPU is less than 5% idle'
+
+ - name: 'CPU idle less than 10% on {HOST.NAME}'
+ expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<10 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<10'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
+ priority: warn
+ description: 'CPU is less than 10% idle'
+ dependencies:
+ - 'CPU idle less than 5% on {HOST.NAME}'