summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README_libvirt.md3
-rw-r--r--playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml11
-rw-r--r--roles/lib_zabbix/library/zbx_item.py2
-rw-r--r--roles/lib_zabbix/library/zbx_itemprototype.py6
-rw-r--r--roles/lib_zabbix/library/zbx_trigger.py24
-rw-r--r--roles/lib_zabbix/library/zbx_user_media.py3
-rwxr-xr-xroles/openshift_facts/library/openshift_facts.py8
-rw-r--r--roles/openshift_manage_node/tasks/main.yml6
-rw-r--r--roles/openshift_node/templates/node.yaml.v1.j22
-rw-r--r--roles/os_zabbix/tasks/main.yml18
-rw-r--r--roles/os_zabbix/vars/template_app_zabbix_agent.yml23
-rw-r--r--roles/os_zabbix/vars/template_app_zabbix_server.yml408
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml18
-rw-r--r--roles/os_zabbix/vars/template_openshift_node.yml28
-rw-r--r--roles/os_zabbix/vars/template_os_linux.yml40
15 files changed, 540 insertions, 60 deletions
diff --git a/README_libvirt.md b/README_libvirt.md
index 1a710ff3b..3f8bbb5f0 100644
--- a/README_libvirt.md
+++ b/README_libvirt.md
@@ -94,7 +94,8 @@ dns=dnsmasq
- Configure dnsmasq to use the Virtual Network router for example.com:
```sh
-sudo vi /etc/NetworkManager/dnsmasq.d/libvirt_dnsmasq.conf server=/example.com/192.168.55.1
+sudo vi /etc/NetworkManager/dnsmasq.d/libvirt_dnsmasq.conf
+server=/example.com/192.168.55.1
```
Test The Setup
diff --git a/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml
index c9ae923bb..b6a2d2f26 100644
--- a/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml
+++ b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml
@@ -27,9 +27,8 @@
gather_facts: no
vars:
- cli_volume_type: io1
+ cli_volume_type: gp2
cli_volume_size: 30
- cli_volume_iops: "{{ 30 * cli_volume_size }}"
pre_tasks:
- fail:
@@ -104,7 +103,6 @@
volume_size: "{{ cli_volume_size | default(30, True)}}"
volume_type: "{{ cli_volume_type }}"
device_name: /dev/xvdb
- iops: "{{ 30 * cli_volume_size }}"
register: vol
- debug: var=vol
@@ -142,10 +140,3 @@
- debug: var=dockerstart
- - name: Wait for docker to stabilize
- pause:
- seconds: 30
-
- # leaving off the '-t' for docker exec. With it, it doesn't work with ansible and tty support
- - name: update zabbix docker items
- command: docker exec -i oso-rhel7-zagg-client /usr/local/bin/cron-send-docker-metrics.py
diff --git a/roles/lib_zabbix/library/zbx_item.py b/roles/lib_zabbix/library/zbx_item.py
index 2ccc21292..6faa82dfc 100644
--- a/roles/lib_zabbix/library/zbx_item.py
+++ b/roles/lib_zabbix/library/zbx_item.py
@@ -53,6 +53,8 @@ def get_value_type(value_type):
vtype = 0
if 'int' in value_type:
vtype = 3
+ elif 'log' in value_type:
+ vtype = 2
elif 'char' in value_type:
vtype = 1
elif 'str' in value_type:
diff --git a/roles/lib_zabbix/library/zbx_itemprototype.py b/roles/lib_zabbix/library/zbx_itemprototype.py
index 4ec1b8e02..e7fd6fa21 100644
--- a/roles/lib_zabbix/library/zbx_itemprototype.py
+++ b/roles/lib_zabbix/library/zbx_itemprototype.py
@@ -128,12 +128,12 @@ def get_status(status):
return _status
-def get_app_ids(zapi, application_names):
+def get_app_ids(zapi, application_names, templateid):
''' get application ids from names
'''
app_ids = []
for app_name in application_names:
- content = zapi.get_content('application', 'get', {'search': {'name': app_name}})
+ content = zapi.get_content('application', 'get', {'filter': {'name': app_name}, 'templateids': templateid})
if content.has_key('result'):
app_ids.append(content['result'][0]['applicationid'])
return app_ids
@@ -212,7 +212,7 @@ def main():
'ruleid': get_rule_id(zapi, module.params['discoveryrule_key'], template['templateid']),
'type': get_type(module.params['ztype']),
'value_type': get_value_type(module.params['value_type']),
- 'applications': get_app_ids(zapi, module.params['applications']),
+ 'applications': get_app_ids(zapi, module.params['applications'], template['templateid']),
'description': module.params['description'],
}
diff --git a/roles/lib_zabbix/library/zbx_trigger.py b/roles/lib_zabbix/library/zbx_trigger.py
index 21d0fcfd2..ab7731faa 100644
--- a/roles/lib_zabbix/library/zbx_trigger.py
+++ b/roles/lib_zabbix/library/zbx_trigger.py
@@ -86,6 +86,24 @@ def get_trigger_status(inc_status):
return r_status
+def get_template_id(zapi, template_name):
+ '''
+ get related templates
+ '''
+ template_ids = []
+ app_ids = {}
+ # Fetch templates by name
+ content = zapi.get_content('template',
+ 'get',
+ {'search': {'host': template_name},
+ 'selectApplications': ['applicationid', 'name']})
+ if content.has_key('result'):
+ template_ids.append(content['result'][0]['templateid'])
+ for app in content['result'][0]['applications']:
+ app_ids[app['name']] = app['applicationid']
+
+ return template_ids, app_ids
+
def main():
'''
Create a trigger in zabbix
@@ -117,6 +135,7 @@ def main():
url=dict(default=None, type='str'),
status=dict(default=None, type='str'),
state=dict(default='present', type='str'),
+ template_name=dict(default=None, type='str'),
),
#supports_check_mode=True
)
@@ -132,11 +151,16 @@ def main():
state = module.params['state']
tname = module.params['name']
+ templateid = None
+ if module.params['template_name']:
+ templateid, _ = get_template_id(zapi, module.params['template_name'])
+
content = zapi.get_content(zbx_class_name,
'get',
{'filter': {'description': tname},
'expandExpression': True,
'selectDependencies': 'triggerid',
+ 'templateids': templateid,
})
# Get
diff --git a/roles/lib_zabbix/library/zbx_user_media.py b/roles/lib_zabbix/library/zbx_user_media.py
index 9ed838f81..8895c78c3 100644
--- a/roles/lib_zabbix/library/zbx_user_media.py
+++ b/roles/lib_zabbix/library/zbx_user_media.py
@@ -260,6 +260,9 @@ def main():
for user in params['users']:
diff['users']['userid'] = user['userid']
+ # Medias have no real unique key so therefore we need to make it like the incoming user's request
+ diff['medias'] = medias
+
# We have differences and need to update
content = zapi.get_content(zbx_class_name, 'updatemedia', diff)
diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py
index 987f7f7da..69bb49c9b 100755
--- a/roles/openshift_facts/library/openshift_facts.py
+++ b/roles/openshift_facts/library/openshift_facts.py
@@ -480,11 +480,11 @@ def set_deployment_facts_if_unset(facts):
if role in facts:
deployment_type = facts['common']['deployment_type']
if 'registry_url' not in facts[role]:
- registry_url = 'aos3/aos-${component}:${version}'
- if deployment_type in ['enterprise', 'online']:
+ registry_url = 'openshift/origin-${component}:${version}'
+ if deployment_type in ['enterprise', 'online', 'openshift-enterprise']:
registry_url = 'openshift3/ose-${component}:${version}'
- elif deployment_type == 'origin':
- registry_url = 'openshift/origin-${component}:${version}'
+ elif deployment_type == 'atomic-enterprise':
+ registry_url = 'aep3/aep-${component}:${version}'
facts[role]['registry_url'] = registry_url
return facts
diff --git a/roles/openshift_manage_node/tasks/main.yml b/roles/openshift_manage_node/tasks/main.yml
index 94d7879b2..637e494ea 100644
--- a/roles/openshift_manage_node/tasks/main.yml
+++ b/roles/openshift_manage_node/tasks/main.yml
@@ -1,6 +1,6 @@
- name: Wait for Node Registration
command: >
- {{ openshift.common.client_binary }} get node {{ item }}
+ {{ openshift.common.client_binary }} get node {{ item | lower }}
register: omd_get_node
until: omd_get_node.rc == 0
retries: 20
@@ -9,13 +9,13 @@
- name: Set node schedulability
command: >
- {{ openshift.common.admin_binary }} manage-node {{ item.openshift.common.hostname }} --schedulable={{ 'true' if item.openshift.node.schedulable | bool else 'false' }}
+ {{ openshift.common.admin_binary }} manage-node {{ item.openshift.common.hostname | lower }} --schedulable={{ 'true' if item.openshift.node.schedulable | bool else 'false' }}
with_items:
- "{{ openshift_node_vars }}"
- name: Label nodes
command: >
- {{ openshift.common.client_binary }} label --overwrite node {{ item.openshift.common.hostname }} {{ item.openshift.node.labels | oo_combine_dict }}
+ {{ openshift.common.client_binary }} label --overwrite node {{ item.openshift.common.hostname | lower }} {{ item.openshift.node.labels | oo_combine_dict }}
with_items:
- "{{ openshift_node_vars }}"
when: "'labels' in item.openshift.node and item.openshift.node.labels != {}"
diff --git a/roles/openshift_node/templates/node.yaml.v1.j2 b/roles/openshift_node/templates/node.yaml.v1.j2
index 07d80f99b..946c0b655 100644
--- a/roles/openshift_node/templates/node.yaml.v1.j2
+++ b/roles/openshift_node/templates/node.yaml.v1.j2
@@ -18,7 +18,7 @@ networkPluginName: {{ openshift.common.sdn_network_plugin_name }}
networkConfig:
mtu: {{ openshift.node.sdn_mtu }}
networkPluginName: {{ openshift.common.sdn_network_plugin_name }}
-nodeName: {{ openshift.common.hostname }}
+nodeName: {{ openshift.common.hostname | lower }}
podManifestConfig:
servingInfo:
bindAddress: 0.0.0.0:10250
diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml
index 28e900255..a503b24d7 100644
--- a/roles/os_zabbix/tasks/main.yml
+++ b/roles/os_zabbix/tasks/main.yml
@@ -13,6 +13,8 @@
- include_vars: template_openshift_master.yml
- include_vars: template_openshift_node.yml
- include_vars: template_ops_tools.yml
+- include_vars: template_app_zabbix_server.yml
+- include_vars: template_app_zabbix_agent.yml
- name: Include Template Heartbeat
include: ../../lib_zabbix/tasks/create_template.yml
@@ -61,3 +63,19 @@
server: "{{ ozb_server }}"
user: "{{ ozb_user }}"
password: "{{ ozb_password }}"
+
+- name: Include Template App Zabbix Server
+ include: ../../lib_zabbix/tasks/create_template.yml
+ vars:
+ template: "{{ g_template_app_zabbix_server }}"
+ server: "{{ ozb_server }}"
+ user: "{{ ozb_user }}"
+ password: "{{ ozb_password }}"
+
+- name: Include Template App Zabbix Agent
+ include: ../../lib_zabbix/tasks/create_template.yml
+ vars:
+ template: "{{ g_template_app_zabbix_agent }}"
+ server: "{{ ozb_server }}"
+ user: "{{ ozb_user }}"
+ password: "{{ ozb_password }}"
diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml
new file mode 100644
index 000000000..06c4eda8b
--- /dev/null
+++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml
@@ -0,0 +1,23 @@
+---
+g_template_app_zabbix_agent:
+ name: Template App Zabbix Agent
+ zitems:
+ - key: agent.hostname
+ applications:
+ - Zabbix agent
+ value_type: character
+ zabbix_type: '0'
+
+ - key: agent.ping
+ applications:
+ - Zabbix agent
+ description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check.
+ value_type: int
+ zabbix_type: '0'
+
+ ztriggers:
+ - name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes'
+ description: Zabbix agent is unreachable for 15 minutes.
+ expression: '{Template App Zabbix Agent:agent.ping.nodata(15m)}=1'
+ priority: high
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_ping.asciidoc
diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml
new file mode 100644
index 000000000..dace2aa29
--- /dev/null
+++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml
@@ -0,0 +1,408 @@
+---
+g_template_app_zabbix_server:
+ name: Template App Zabbix Server
+ zitems:
+ - key: housekeeper_creates
+ applications:
+ - Zabbix server
+ description: A simple count of the number of partition creates output by the housekeeper script.
+ units: ''
+ value_type: int
+ zabbix_type: '2'
+
+ - key: housekeeper_drops
+ applications:
+ - Zabbix server
+ description: A simple count of the number of partition drops output by the housekeeper script.
+ units: ''
+ value_type: int
+ zabbix_type: '2'
+
+ - key: housekeeper_errors
+ applications:
+ - Zabbix server
+ description: A simple count of the number of errors output by the housekeeper script.
+ units: ''
+ value_type: int
+ zabbix_type: '2'
+
+ - key: housekeeper_total
+ applications:
+ - Zabbix server
+ description: A simple count of the total number of lines output by the housekeeper
+ script.
+ units: ''
+ value_type: int
+ zabbix_type: '2'
+
+ - key: zabbix[process,alerter,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,configuration syncer,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,db watchdog,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,discoverer,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,escalator,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,history syncer,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,housekeeper,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,http poller,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,icmp pinger,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,ipmi poller,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,java poller,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,node watcher,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,poller,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,proxy poller,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,self-monitoring,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,snmp trapper,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,timer,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,trapper,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[process,unreachable poller,avg,busy]
+ applications:
+ - Zabbix server
+ description: ''
+ units: '%'
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[queue,10m]
+ applications:
+ - Zabbix server
+ description: ''
+ units: ''
+ value_type: int
+ zabbix_type: '5'
+
+ - key: zabbix[queue]
+ applications:
+ - Zabbix server
+ description: ''
+ units: ''
+ value_type: int
+ zabbix_type: '5'
+
+ - key: zabbix[rcache,buffer,pfree]
+ applications:
+ - Zabbix server
+ description: ''
+ units: ''
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[wcache,history,pfree]
+ applications:
+ - Zabbix server
+ description: ''
+ units: ''
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[wcache,text,pfree]
+ applications:
+ - Zabbix server
+ description: ''
+ units: ''
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[wcache,trend,pfree]
+ applications:
+ - Zabbix server
+ description: ''
+ units: ''
+ value_type: float
+ zabbix_type: '5'
+
+ - key: zabbix[wcache,values]
+ applications:
+ - Zabbix server
+ description: ''
+ units: ''
+ value_type: float
+ zabbix_type: '5'
+ ztriggers:
+ - description: "There has been unexpected output while running the housekeeping script\
+ \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\
+ \ and we've gotten something enw.\r\n\r\nCheck the script's output in /var/lib/zabbix/state\
+ \ for more details."
+ expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}+{Template App Zabbix Server:housekeeper_creates.last(0)}+{Template App Zabbix Server:housekeeper_drops.last(0)}<>{Template App Zabbix Server:housekeeper_total.last(0)}'
+ name: Unexpected output in Zabbix DB Housekeeping
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_DB_Housekeeping.asciidoc
+
+ - description: An error has occurred during running the housekeeping script on the Zabbix. Check the script's output in /var/lib/zabbix/state for more details.
+ expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}>0'
+ name: Errors during Zabbix DB Housekeeping
+ priority: high
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,alerter,avg,busy].min(600)}>75'
+ name: Zabbix alerter processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,configuration syncer,avg,busy].min(600)}>75'
+ name: Zabbix configuration syncer processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,db watchdog,avg,busy].min(600)}>75'
+ name: Zabbix db watchdog processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,discoverer,avg,busy].min(600)}>75'
+ name: Zabbix discoverer processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,escalator,avg,busy].min(600)}>75'
+ name: Zabbix escalator processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,history syncer,avg,busy].min(600)}>75'
+ name: Zabbix history syncer processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,housekeeper,avg,busy].min(1800)}>75'
+ name: Zabbix housekeeper processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,http poller,avg,busy].min(600)}>75'
+ name: Zabbix http poller processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,icmp pinger,avg,busy].min(600)}>75'
+ name: Zabbix icmp pinger processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,ipmi poller,avg,busy].min(600)}>75'
+ name: Zabbix ipmi poller processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,java poller,avg,busy].min(600)}>75'
+ name: Zabbix java poller processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,node watcher,avg,busy].min(600)}>75'
+ name: Zabbix node watcher processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,poller,avg,busy].min(600)}>75'
+ name: Zabbix poller processes more than 75% busy
+ priority: high
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,proxy poller,avg,busy].min(600)}>75'
+ name: Zabbix proxy poller processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,self-monitoring,avg,busy].min(600)}>75'
+ name: Zabbix self-monitoring processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,snmp trapper,avg,busy].min(600)}>75'
+ name: Zabbix snmp trapper processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: Timer processes usually are busy because they have to process time
+ based trigger functions
+ expression: '{Template App Zabbix Server:zabbix[process,timer,avg,busy].min(600)}>75'
+ name: Zabbix timer processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,trapper,avg,busy].min(600)}>75'
+ name: Zabbix trapper processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[process,unreachable poller,avg,busy].min(600)}>75'
+ name: Zabbix unreachable poller processes more than 75% busy
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+ - description: "This alert generally indicates a performance problem or a problem\
+ \ with the zabbix-server or proxy.\r\n\r\nThe first place to check for issues\
+ \ is Administration > Queue. Be sure to check the general view and the per-proxy\
+ \ view."
+ expression: '{Template App Zabbix Server:zabbix[queue,10m].min(600)}>1000'
+ name: More than 1000 items having missing data for more than 10 minutes
+ priority: high
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/data_lost_overview_plugin.asciidoc
+
+ - description: Consider increasing CacheSize in the zabbix_server.conf configuration
+ file
+ expression: '{Template App Zabbix Server:zabbix[rcache,buffer,pfree].min(600)}<5'
+ name: Less than 5% free in the configuration cache
+ priority: info
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[wcache,history,pfree].min(600)}<25'
+ name: Less than 25% free in the history cache
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[wcache,text,pfree].min(600)}<25'
+ name: Less than 25% free in the text history cache
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
+
+ - description: ''
+ expression: '{Template App Zabbix Server:zabbix[wcache,trend,pfree].min(600)}<25'
+ name: Less than 25% free in the trends cache
+ priority: avg
+ url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index c71e07910..4ae918ec6 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -13,6 +13,18 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.user.count
+ description: Shows number of users in a cluster
+ type: int
+ applications:
+ - Openshift Master
+
+ - key: openshift.master.pod.running.count
+ description: Shows number of pods running
+ type: int
+ applications:
+ - Openshift Master
+
ztriggers:
- name: 'Application creation has failed on {HOST.NAME}'
expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
@@ -28,3 +40,9 @@ g_template_openshift_master:
expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
priority: high
+
+ - name: 'Number of users for Openshift Master on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.user.count.last()}=0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ priority: info
+
diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml
index 36f9cc4a3..ce28b1048 100644
--- a/roles/os_zabbix/vars/template_openshift_node.yml
+++ b/roles/os_zabbix/vars/template_openshift_node.yml
@@ -8,13 +8,37 @@ g_template_openshift_node:
applications:
- Openshift Node
+ - key: openshift.node.ovs.pids.count
+ description: Shows number of ovs process ids running
+ type: int
+ applications:
+ - Openshift Node
+
+ - key: openshift.node.ovs.ports.count
+ description: Shows number of OVS ports defined
+ type: int
+ applications:
+ - Openshift Node
+
ztriggers:
- name: 'Openshift Node process not running on {HOST.NAME}'
expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
+ url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
priority: high
- name: 'Too many Openshift Node processes running on {HOST.NAME}'
expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
+ url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
+ priority: high
+
+ - name: 'OVS may not be running on {HOST.NAME}'
+ expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last()}<>4'
+ url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
priority: high
+
+ - name: 'Number of OVS ports is 0 on {HOST.NAME}'
+ expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0'
+ url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
+ priority: high
+
+
diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml
index 70c3809bd..cd9649773 100644
--- a/roles/os_zabbix/vars/template_os_linux.yml
+++ b/roles/os_zabbix/vars/template_os_linux.yml
@@ -188,18 +188,6 @@ g_template_os_linux:
multiplier: 1024
units: B
- # Disk items
- - key: filesys.full.xvda2
- applications:
- - Disk
- value_type: float
-
- - key: filesys.full.xvda3
- applications:
- - Disk
- value_type: float
-
-
zdiscoveryrules:
- name: disc.filesys
key: disc.filesys
@@ -216,37 +204,17 @@ g_template_os_linux:
- Disk
ztriggerprototypes:
- - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}'
- expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
- - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free on {HOST.NAME}'
- expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>95'
+ - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}'
+ expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
ztriggers:
- - name: 'Filesystem: / has less than 10% free on {HOST.NAME}'
- expression: '{Template OS Linux:filesys.full.xvda2.last()}>90'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: warn
-
- - name: 'Filesystem: / has less than 5% free on {HOST.NAME}'
- expression: '{Template OS Linux:filesys.full.xvda2.last()}>95'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: high
-
- - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}'
- expression: '{Template OS Linux:filesys.full.xvda3.last()}>90'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: warn
-
- - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}'
- expression: '{Template OS Linux:filesys.full.xvda3.last()}>95'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: high
-
- name: 'Too many TOTAL processes on {HOST.NAME}'
expression: '{Template OS Linux:proc.nprocs.last()}>5000'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'