--- g_template_os_linux: name: Template OS Linux zitems: - key: kernel.uname.sysname applications: - Kernel value_type: string - key: kernel.all.cpu.wait.total applications: - Kernel value_type: float units: '%' - key: kernel.all.cpu.irq.hard applications: - Kernel value_type: float units: '%' - key: kernel.all.cpu.idle applications: - Kernel value_type: float units: '%' - key: kernel.uname.distro applications: - Kernel value_type: string - key: kernel.uname.nodename applications: - Kernel value_type: string - key: kernel.all.cpu.irq.soft applications: - Kernel value_type: float units: '%' - key: kernel.all.load.15_minute applications: - Kernel value_type: float - key: kernel.all.cpu.sys applications: - Kernel value_type: float units: '%' - key: kernel.all.load.5_minute applications: - Kernel value_type: float - key: kernel.all.cpu.nice applications: - Kernel value_type: float units: '%' - key: kernel.all.load.1_minute applications: - Kernel value_type: float - key: kernel.uname.version applications: - Kernel value_type: string - key: kernel.all.uptime applications: - Kernel value_type: int - key: kernel.all.cpu.user applications: - Kernel value_type: float units: '%' - key: kernel.uname.machine applications: - Kernel value_type: string - key: hinv.ncpu applications: - Kernel value_type: int - key: kernel.all.cpu.steal applications: - Kernel value_type: float units: '%' - key: kernel.all.pswitch applications: - Kernel value_type: int - key: kernel.uname.release applications: - Kernel value_type: string - key: proc.nprocs applications: - Kernel value_type: int # Memory Items - key: mem.freemem applications: - Memory value_type: int description: "PCP: free system memory metric from /proc/meminfo" multiplier: 1024 units: B - key: mem.util.bufmem applications: - Memory value_type: int description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo" multiplier: 1024 units: B - key: swap.used applications: - Memory value_type: int description: "PCP: swap used metric from /proc/meminfo" multiplier: 1024 units: B - key: swap.length applications: - Memory value_type: int description: "PCP: total swap available metric from /proc/meminfo" multiplier: 1024 units: B - key: mem.physmem applications: - Memory value_type: int description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM." multiplier: 1024 units: B - key: swap.free applications: - Memory value_type: int description: "PCP: swap free metric from /proc/meminfo" multiplier: 1024 units: B - key: mem.util.available applications: - Memory value_type: int description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo" multiplier: 1024 units: B - key: mem.util.used applications: - Memory value_type: int description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo" multiplier: 1024 units: B - key: mem.util.cached applications: - Memory value_type: int description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo" multiplier: 1024 units: B zdiscoveryrules: - name: disc.filesys key: disc.filesys lifetime: 1 description: "Dynamically register the filesystems" - name: disc.disk key: disc.disk lifetime: 1 description: "Dynamically register disks on a node" - name: disc.network key: disc.network lifetime: 1 description: "Dynamically register network interfaces on a node" zitemprototypes: - discoveryrule_key: disc.filesys name: "disc.filesys.full.{#OSO_FILESYS}" key: "disc.filesys.full[{#OSO_FILESYS}]" value_type: float description: "PCP filesys.full option. This is the percent full returned from pcp filesys.full" applications: - Disk - discoveryrule_key: disc.filesys name: "Percentage of used inodes on {#OSO_FILESYS}" key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]" value_type: float description: "PCP derived value of percentage of used inodes on a filesystem." applications: - Disk - discoveryrule_key: disc.disk name: "TPS (IOPS) for disk {#OSO_DISK}" key: "disc.disk.tps[{#OSO_DISK}]" value_type: int description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using" applications: - Disk - discoveryrule_key: disc.disk name: "Percent Utilized for disk {#OSO_DISK}" key: "disc.disk.putil[{#OSO_DISK}]" value_type: float description: "PCP disk.dev.avactive metric measured over a period of time. This is the '%util' in the iostat command" applications: - Disk - discoveryrule_key: disc.network name: "Bytes per second IN on network interface {#OSO_NET_INTERFACE}" key: "disc.network.in.bytes[{#OSO_NET_INTERFACE}]" value_type: int units: B delta: 1 description: "PCP network.interface.in.bytes metric. This is setup as a delta in Zabbix to measure the speed per second" applications: - Network - discoveryrule_key: disc.network name: "Bytes per second OUT on network interface {#OSO_NET_INTERFACE}" key: "disc.network.out.bytes[{#OSO_NET_INTERFACE}]" value_type: int units: B delta: 1 description: "PCP network.interface.out.bytes metric. This is setup as a delta in Zabbix to measure the speed per second" applications: - Network ztriggerprototypes: - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high # This has a dependency on the previous trigger # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0 - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn dependencies: - 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high # This has a dependency on the previous trigger # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0 - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn dependencies: - 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}' ztriggers: - name: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' priority: warn - name: 'Lack of available memory on {HOST.NAME}' expression: '{Template OS Linux:mem.freemem.last()}<30720000' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc' priority: warn description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024' # CPU Utilization # - name: 'CPU idle less than 5% on {HOST.NAME}' expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<5' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' priority: average description: 'CPU is less than 5% idle' - name: 'CPU idle less than 10% on {HOST.NAME}' expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<10' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' priority: average description: 'CPU is less than 10% idle' dependencies: - 'CPU idle less than 5% on {HOST.NAME}'