summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/lib_zabbix/library/zbx_item.py33
-rw-r--r--roles/lib_zabbix/library/zbx_trigger.py8
-rw-r--r--roles/lib_zabbix/tasks/create_template.yml8
-rw-r--r--roles/os_zabbix/vars/template_docker.yml12
-rw-r--r--roles/os_zabbix/vars/template_heartbeat.yml2
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml2
-rw-r--r--roles/os_zabbix/vars/template_os_linux.yml124
7 files changed, 127 insertions, 62 deletions
diff --git a/roles/lib_zabbix/library/zbx_item.py b/roles/lib_zabbix/library/zbx_item.py
index 388db31b9..11e3c7b2b 100644
--- a/roles/lib_zabbix/library/zbx_item.py
+++ b/roles/lib_zabbix/library/zbx_item.py
@@ -88,6 +88,23 @@ def get_template_id(zapi, template_name):
return template_ids, app_ids
+def get_multiplier(inval):
+ ''' Determine the multiplier
+ '''
+ if inval == None or inval == '':
+ return None, None
+
+ rval = None
+ try:
+ rval = int(inval)
+ except ValueError:
+ pass
+
+ if rval:
+ return rval, True
+
+ return rval, False
+
# The branches are needed for CRUD and error handling
# pylint: disable=too-many-branches
def main():
@@ -106,6 +123,9 @@ def main():
template_name=dict(default=None, type='str'),
zabbix_type=dict(default=2, type='int'),
value_type=dict(default='int', type='str'),
+ multiplier=dict(default=None, type='str'),
+ description=dict(default=None, type='str'),
+ units=dict(default=None, type='str'),
applications=dict(default=None, type='list'),
state=dict(default='present', type='str'),
),
@@ -137,11 +157,15 @@ def main():
'templateids': templateid,
})
- # Get
+ #******#
+ # GET
+ #******#
if state == 'list':
module.exit_json(changed=False, results=content['result'], state="list")
- # Delete
+ #******#
+ # DELETE
+ #******#
if state == 'absent':
if not exists(content):
module.exit_json(changed=False, state="absent")
@@ -152,12 +176,17 @@ def main():
# Create and Update
if state == 'present':
+ formula, use_multiplier = get_multiplier(module.params['multiplier'])
params = {'name': module.params.get('name', module.params['key']),
'key_': module.params['key'],
'hostid': templateid[0],
'type': module.params['zabbix_type'],
'value_type': get_value_type(module.params['value_type']),
'applications': get_app_ids(module.params['applications'], app_name_ids),
+ 'formula': formula,
+ 'multiplier': use_multiplier,
+ 'description': module.params['description'],
+ 'units': module.params['units'],
}
# Remove any None valued params
diff --git a/roles/lib_zabbix/library/zbx_trigger.py b/roles/lib_zabbix/library/zbx_trigger.py
index c384f6fa3..a05de7e68 100644
--- a/roles/lib_zabbix/library/zbx_trigger.py
+++ b/roles/lib_zabbix/library/zbx_trigger.py
@@ -98,6 +98,7 @@ def main():
zbx_password=dict(default=os.environ.get('ZABBIX_PASSWORD', None), type='str'),
zbx_debug=dict(default=False, type='bool'),
expression=dict(default=None, type='str'),
+ name=dict(default=None, type='str'),
description=dict(default=None, type='str'),
dependencies=dict(default=[], type='list'),
priority=dict(default='avg', type='str'),
@@ -116,11 +117,11 @@ def main():
zbx_class_name = 'trigger'
idname = "triggerid"
state = module.params['state']
- description = module.params['description']
+ tname = module.params['name']
content = zapi.get_content(zbx_class_name,
'get',
- {'filter': {'description': description},
+ {'filter': {'description': tname},
'expandExpression': True,
'selectDependencies': 'triggerid',
})
@@ -138,7 +139,8 @@ def main():
# Create and Update
if state == 'present':
- params = {'description': description,
+ params = {'description': tname,
+ 'comments': module.params['description'],
'expression': module.params['expression'],
'dependencies': get_deps(zapi, module.params['dependencies']),
'priority': get_priority(module.params['priority']),
diff --git a/roles/lib_zabbix/tasks/create_template.yml b/roles/lib_zabbix/tasks/create_template.yml
index bc9aff997..fd0cdd46f 100644
--- a/roles/lib_zabbix/tasks/create_template.yml
+++ b/roles/lib_zabbix/tasks/create_template.yml
@@ -30,6 +30,9 @@
key: "{{ item.key }}"
name: "{{ item.name | default(item.key, true) }}"
value_type: "{{ item.value_type | default('int') }}"
+ description: "{{ item.description | default('', True) }}"
+ multiplier: "{{ item.multiplier | default('', True) }}"
+ units: "{{ item.units | default('', True) }}"
template_name: "{{ template.name }}"
applications: "{{ item.applications }}"
with_items: template.zitems
@@ -41,8 +44,9 @@
zbx_server: "{{ server }}"
zbx_user: "{{ user }}"
zbx_password: "{{ password }}"
- description: "{{ item.description }}"
- dependencies: "{{ item.dependencies | default([], true) }}"
+ name: "{{ item.name }}"
+ description: "{{ item.description | default('', True) }}"
+ dependencies: "{{ item.dependencies | default([], True) }}"
expression: "{{ item.expression }}"
priority: "{{ item.priority }}"
url: "{{ item.url | default(None, True) }}"
diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml
index a1cd3519e..395e054de 100644
--- a/roles/os_zabbix/vars/template_docker.yml
+++ b/roles/os_zabbix/vars/template_docker.yml
@@ -52,35 +52,35 @@ g_template_docker:
- Docker Storage
value_type: float
ztriggers:
- - description: 'docker.ping failed on {HOST.NAME}'
+ - name: 'docker.ping failed on {HOST.NAME}'
expression: '{Template Docker:docker.ping.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc'
priority: high
- - description: 'Docker storage is using LOOPBACK on {HOST.NAME}'
+ - name: 'Docker storage is using LOOPBACK on {HOST.NAME}'
expression: '{Template Docker:docker.storage.is_loopback.last()}<>0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc'
priority: high
- - description: 'Critically low docker storage data space on {HOST.NAME}'
+ - name: 'Critically low docker storage data space on {HOST.NAME}'
expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.data.space.available.max(#3)}<5' # < 5% or < 5GB
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc'
priority: high
- - description: 'Critically low docker storage metadata space on {HOST.NAME}'
+ - name: 'Critically low docker storage metadata space on {HOST.NAME}'
expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.005' # < 5% or < 5MB
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc'
priority: high
# Put triggers that depend on other triggers here (deps must be created first)
- - description: 'Low docker storage data space on {HOST.NAME}'
+ - name: 'Low docker storage data space on {HOST.NAME}'
expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.data.space.available.max(#3)}<10' # < 10% or < 10GB
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc'
dependencies:
- 'Critically low docker storage data space on {HOST.NAME}'
priority: average
- - description: 'Low docker storage metadata space on {HOST.NAME}'
+ - name: 'Low docker storage metadata space on {HOST.NAME}'
expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.01' # < 10% or < 10MB
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc'
dependencies:
diff --git a/roles/os_zabbix/vars/template_heartbeat.yml b/roles/os_zabbix/vars/template_heartbeat.yml
index 798377cd9..8dbe0d0d6 100644
--- a/roles/os_zabbix/vars/template_heartbeat.yml
+++ b/roles/os_zabbix/vars/template_heartbeat.yml
@@ -7,7 +7,7 @@ g_template_heartbeat:
- Heartbeat
key: heartbeat.ping
ztriggers:
- - description: 'Heartbeat.ping has failed on {HOST.NAME}'
+ - name: 'Heartbeat.ping has failed on {HOST.NAME}'
expression: '{Template Heartbeat:heartbeat.ping.nodata(20m)}=1'
priority: avg
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_node_heartbeat.asciidoc'
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index d2c1365b0..728423ac1 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -7,7 +7,7 @@ g_template_openshift_master:
- Openshift Master
key: create_app
ztriggers:
- - description: 'Application creation has failed on {HOST.NAME}'
+ - name: 'Application creation has failed on {HOST.NAME}'
expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
priority: avg
diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml
index 7c446cd85..3173c79b2 100644
--- a/roles/os_zabbix/vars/template_os_linux.yml
+++ b/roles/os_zabbix/vars/template_os_linux.yml
@@ -52,106 +52,135 @@ g_template_os_linux:
- Kernel
value_type: float
- - key: mem.freemem
+ - key: kernel.all.cpu.nice
applications:
- - Memory
+ - Kernel
value_type: int
- - key: kernel.all.cpu.nice
+ - key: kernel.all.load.1_minute
applications:
- Kernel
- value_type: int
+ value_type: float
- - key: mem.util.bufmem
+ - key: kernel.uname.version
applications:
- - Memory
- value_type: int
+ - Kernel
+ value_type: string
- - key: swap.used
+ - key: kernel.all.uptime
applications:
- - Memory
+ - Kernel
value_type: int
- - key: kernel.all.load.1_minute
+ - key: kernel.all.cpu.user
applications:
- Kernel
- value_type: float
+ value_type: int
- - key: kernel.uname.version
+ - key: kernel.uname.machine
applications:
- Kernel
value_type: string
- - key: swap.length
+ - key: hinv.ncpu
applications:
- - Memory
+ - Kernel
value_type: int
- - key: mem.physmem
+ - key: kernel.all.cpu.steal
applications:
- - Memory
+ - Kernel
value_type: int
- - key: kernel.all.uptime
+ - key: kernel.all.pswitch
applications:
- Kernel
value_type: int
- - key: swap.free
+ - key: kernel.uname.release
applications:
- - Memory
- value_type: int
+ - Kernel
+ value_type: string
- - key: mem.util.available
+ - key: proc.nprocs
applications:
- - Memory
+ - Kernel
value_type: int
- - key: mem.util.used
+ # Memory Items
+ - key: mem.freemem
applications:
- Memory
value_type: int
+ description: "PCP: free system memory metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
- - key: kernel.all.cpu.user
+ - key: mem.util.bufmem
applications:
- - Kernel
+ - Memory
value_type: int
+ description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
- - key: kernel.uname.machine
+ - key: swap.used
applications:
- - Kernel
- value_type: string
+ - Memory
+ value_type: int
+ description: "PCP: swap used metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
- - key: hinv.ncpu
+ - key: swap.length
applications:
- - Kernel
+ - Memory
value_type: int
+ description: "PCP: total swap available metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
- - key: mem.util.cached
+ - key: mem.physmem
applications:
- Memory
value_type: int
+ description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM."
+ multiplier: 1024
+ units: B
- - key: kernel.all.cpu.steal
+ - key: swap.free
applications:
- - Kernel
+ - Memory
value_type: int
+ description: "PCP: swap free metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
- - key: kernel.all.pswitch
+ - key: mem.util.available
applications:
- - Kernel
+ - Memory
value_type: int
+ description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo"
+ multiplier: 1024
+ units: B
- - key: kernel.uname.release
+ - key: mem.util.used
applications:
- - Kernel
- value_type: string
+ - Memory
+ value_type: int
+ description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
- - key: proc.nprocs
+ - key: mem.util.cached
applications:
- - Kernel
+ - Memory
value_type: int
+ description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo"
+ multiplier: 1024
+ units: B
+ # Disk items
- key: filesys.full.xvda2
applications:
- Disk
@@ -163,32 +192,33 @@ g_template_os_linux:
value_type: float
ztriggers:
- - description: 'Filesystem: / has less than 10% free on {HOST.NAME}'
+ - name: 'Filesystem: / has less than 10% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda2.last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
- - description: 'Filesystem: / has less than 5% free on {HOST.NAME}'
+ - name: 'Filesystem: / has less than 5% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda2.last()}>95'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
- - description: 'Filesystem: /var has less than 10% free on {HOST.NAME}'
+ - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda3.last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
- - description: 'Filesystem: /var has less than 5% free on {HOST.NAME}'
+ - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda3.last()}>95'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
- - description: 'Too many TOTAL processes on {HOST.NAME}'
+ - name: 'Too many TOTAL processes on {HOST.NAME}'
expression: '{Template OS Linux:proc.nprocs.last()}>5000'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'
priority: warn
- - description: 'Lack of available memory on {HOST.NAME}'
- expression: '{Template OS Linux:mem.freemem.last()}<3000'
+ - name: 'Lack of available memory on {HOST.NAME}'
+ expression: '{Template OS Linux:mem.freemem.last()}<30720000'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'
priority: warn
+ description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024'