diff options
| author | Matt Woodson <mwoodson@gmail.com> | 2015-09-10 11:33:33 -0400 | 
|---|---|---|
| committer | Matt Woodson <mwoodson@gmail.com> | 2015-09-10 11:33:33 -0400 | 
| commit | 40c6cccf492c787421b4438392804fe188fbc58b (patch) | |
| tree | d346e83bebb694efe7bb3db30cbd7e2c4580d681 | |
| parent | 105fcbc28f96c9283b4ff4da3a6dfb8f671e38e1 (diff) | |
| parent | 2f0bbb5781b270d88bccd5fcbe90723f9b3a5930 (diff) | |
Merge pull request #574 from openshift/mem_items
added more support for zabbix
| -rw-r--r-- | roles/os_zabbix/vars/template_docker.yml | 12 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_heartbeat.yml | 2 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 2 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_os_linux.yml | 122 | 
4 files changed, 81 insertions, 57 deletions
diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index a1cd3519e..395e054de 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -52,35 +52,35 @@ g_template_docker:      - Docker Storage      value_type: float    ztriggers: -  - description: 'docker.ping failed on {HOST.NAME}' +  - name: 'docker.ping failed on {HOST.NAME}'      expression: '{Template Docker:docker.ping.max(#3)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc'      priority: high -  - description: 'Docker storage is using LOOPBACK on {HOST.NAME}' +  - name: 'Docker storage is using LOOPBACK on {HOST.NAME}'      expression: '{Template Docker:docker.storage.is_loopback.last()}<>0'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc'      priority: high -  - description: 'Critically low docker storage data space on {HOST.NAME}' +  - name: 'Critically low docker storage data space on {HOST.NAME}'      expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.data.space.available.max(#3)}<5' # < 5% or < 5GB      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc'      priority: high -  - description: 'Critically low docker storage metadata space on {HOST.NAME}' +  - name: 'Critically low docker storage metadata space on {HOST.NAME}'      expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.005' # < 5% or < 5MB      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc'      priority: high    # Put triggers that depend on other triggers here (deps must be created first) -  - description: 'Low docker storage data space on {HOST.NAME}' +  - name: 'Low docker storage data space on {HOST.NAME}'      expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.data.space.available.max(#3)}<10' # < 10% or < 10GB      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc'      dependencies:      - 'Critically low docker storage data space on {HOST.NAME}'      priority: average -  - description: 'Low docker storage metadata space on {HOST.NAME}' +  - name: 'Low docker storage metadata space on {HOST.NAME}'      expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.01' # < 10% or < 10MB      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc'      dependencies: diff --git a/roles/os_zabbix/vars/template_heartbeat.yml b/roles/os_zabbix/vars/template_heartbeat.yml index 798377cd9..8dbe0d0d6 100644 --- a/roles/os_zabbix/vars/template_heartbeat.yml +++ b/roles/os_zabbix/vars/template_heartbeat.yml @@ -7,7 +7,7 @@ g_template_heartbeat:      - Heartbeat      key: heartbeat.ping    ztriggers: -  - description: 'Heartbeat.ping has failed on {HOST.NAME}' +  - name: 'Heartbeat.ping has failed on {HOST.NAME}'      expression: '{Template Heartbeat:heartbeat.ping.nodata(20m)}=1'      priority: avg      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_node_heartbeat.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index d2c1365b0..728423ac1 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -7,7 +7,7 @@ g_template_openshift_master:      - Openshift Master      key: create_app    ztriggers: -  - description: 'Application creation has failed on {HOST.NAME}' +  - name: 'Application creation has failed on {HOST.NAME}'      expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'      priority: avg diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index fad6af807..3173c79b2 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -52,112 +52,135 @@ g_template_os_linux:      - Kernel      value_type: float -  - key: mem.freemem +  - key: kernel.all.cpu.nice      applications: -    - Memory +    - Kernel      value_type: int -  - key: kernel.all.cpu.nice +  - key: kernel.all.load.1_minute      applications:      - Kernel -    value_type: int +    value_type: float -  - key: mem.util.bufmem +  - key: kernel.uname.version      applications: -    - Memory -    value_type: int +    - Kernel +    value_type: string -  - key: swap.used +  - key: kernel.all.uptime      applications: -    - Memory +    - Kernel      value_type: int -  - key: kernel.all.load.1_minute +  - key: kernel.all.cpu.user      applications:      - Kernel -    value_type: float +    value_type: int -  - key: kernel.uname.version +  - key: kernel.uname.machine      applications:      - Kernel      value_type: string -  - key: swap.length +  - key: hinv.ncpu      applications: -    - Memory +    - Kernel      value_type: int -  - key: mem.physmem +  - key: kernel.all.cpu.steal      applications: -    - Memory +    - Kernel      value_type: int -  - key: kernel.all.uptime +  - key: kernel.all.pswitch      applications:      - Kernel      value_type: int -  - key: swap.free +  - key: kernel.uname.release      applications: -    - Memory -    value_type: int +    - Kernel +    value_type: string -  - key: mem.util.available +  - key: proc.nprocs      applications: -    - Memory +    - Kernel      value_type: int -  - key: mem.util.used +  # Memory Items +  - key: mem.freemem      applications:      - Memory      value_type: int -    description: used memory +    description: "PCP: free system memory metric from /proc/meminfo"      multiplier: 1024      units: B -  - key: kernel.all.cpu.user +  - key: mem.util.bufmem      applications: -    - Kernel +    - Memory      value_type: int +    description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo" +    multiplier: 1024 +    units: B -  - key: kernel.uname.machine +  - key: swap.used      applications: -    - Kernel -    value_type: string +    - Memory +    value_type: int +    description: "PCP: swap used metric from /proc/meminfo" +    multiplier: 1024 +    units: B -  - key: hinv.ncpu +  - key: swap.length      applications: -    - Kernel +    - Memory      value_type: int +    description: "PCP: total swap available metric from /proc/meminfo" +    multiplier: 1024 +    units: B -  - key: mem.util.cached +  - key: mem.physmem      applications:      - Memory      value_type: int -    description: cached memory +    description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM."      multiplier: 1024      units: B -  - key: kernel.all.cpu.steal +  - key: swap.free      applications: -    - Kernel +    - Memory      value_type: int +    description: "PCP: swap free metric from /proc/meminfo" +    multiplier: 1024 +    units: B -  - key: kernel.all.pswitch +  - key: mem.util.available      applications: -    - Kernel +    - Memory      value_type: int +    description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo" +    multiplier: 1024 +    units: B -  - key: kernel.uname.release +  - key: mem.util.used      applications: -    - Kernel -    value_type: string +    - Memory +    value_type: int +    description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo" +    multiplier: 1024 +    units: B -  - key: proc.nprocs +  - key: mem.util.cached      applications: -    - Kernel +    - Memory      value_type: int +    description: "PCP: Memory used by the page cache, including buffered file data.  This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo" +    multiplier: 1024 +    units: B +  # Disk items    - key: filesys.full.xvda2      applications:      - Disk @@ -169,32 +192,33 @@ g_template_os_linux:      value_type: float    ztriggers: -  - description: 'Filesystem: / has less than 10% free on {HOST.NAME}' +  - name: 'Filesystem: / has less than 10% free on {HOST.NAME}'      expression: '{Template OS Linux:filesys.full.xvda2.last()}>90'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'      priority: warn -  - description: 'Filesystem: / has less than 5% free on {HOST.NAME}' +  - name: 'Filesystem: / has less than 5% free on {HOST.NAME}'      expression: '{Template OS Linux:filesys.full.xvda2.last()}>95'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'      priority: high -  - description: 'Filesystem: /var has less than 10% free on {HOST.NAME}' +  - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}'      expression: '{Template OS Linux:filesys.full.xvda3.last()}>90'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'      priority: warn -  - description: 'Filesystem: /var has less than 5% free on {HOST.NAME}' +  - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}'      expression: '{Template OS Linux:filesys.full.xvda3.last()}>95'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'      priority: high -  - description: 'Too many TOTAL processes on {HOST.NAME}' +  - name: 'Too many TOTAL processes on {HOST.NAME}'      expression: '{Template OS Linux:proc.nprocs.last()}>5000'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'      priority: warn -  - description: 'Lack of available memory on {HOST.NAME}' -    expression: '{Template OS Linux:mem.freemem.last()}<3000' +  - name: 'Lack of available memory on {HOST.NAME}' +    expression: '{Template OS Linux:mem.freemem.last()}<30720000'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'      priority: warn +    description: 'Alert on less than 30MegaBytes.  This is 30 Million Bytes.  30000 KB x 1024'  | 
