diff options
Diffstat (limited to 'roles')
18 files changed, 741 insertions, 43 deletions
diff --git a/roles/lib_zabbix/library/zbx_httptest.py b/roles/lib_zabbix/library/zbx_httptest.py new file mode 100644 index 000000000..96733b3d1 --- /dev/null +++ b/roles/lib_zabbix/library/zbx_httptest.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python +''' + Ansible module for zabbix httpservice +''' +# vim: expandtab:tabstop=4:shiftwidth=4 +# +# Zabbix item ansible module +# +# +# Copyright 2015 Red Hat Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This is in place because each module looks similar to each other. +# These need duplicate code as their behavior is very similar +# but different for each zabbix class. +# pylint: disable=duplicate-code + +# pylint: disable=import-error +from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection + +def exists(content, key='result'): + ''' Check if key exists in content or the size of content[key] > 0 + ''' + if not content.has_key(key): + return False + + if not content[key]: + return False + + return True + +def get_authentication_method(auth): + ''' determine authentication type''' + rval = 0 + if 'basic' in auth: + rval = 1 + elif 'ntlm' in auth: + rval = 2 + + return rval + +def get_verify_host(verify): + ''' + get the values for verify_host + ''' + if verify: + return 1 + + return 0 + +def get_app_id(zapi, application): + ''' + get related templates + ''' + # Fetch templates by name + content = zapi.get_content('application', + 'get', + {'search': {'name': application}, + 'selectApplications': ['applicationid', 'name']}) + if content.has_key('result'): + return content['result'][0]['applicationid'] + + return None + +def get_template_id(zapi, template_name): + ''' + get related templates + ''' + # Fetch templates by name + content = zapi.get_content('template', + 'get', + {'search': {'host': template_name}, + 'selectApplications': ['applicationid', 'name']}) + if content.has_key('result'): + return content['result'][0]['templateid'] + + return None + +def get_host_id_by_name(zapi, host_name): + '''Get host id by name''' + content = zapi.get_content('host', + 'get', + {'filter': {'name': host_name}}) + + return content['result'][0]['hostid'] + +def get_status(status): + ''' Determine the status of the web scenario ''' + rval = 0 + if 'disabled' in status: + return 1 + + return rval + +def find_step(idx, step_list): + ''' find step by index ''' + for step in step_list: + if str(step['no']) == str(idx): + return step + + return None + +def steps_equal(zab_steps, user_steps): + '''compare steps returned from zabbix + and steps passed from user + ''' + + if len(user_steps) != len(zab_steps): + return False + + for idx in range(1, len(user_steps)+1): + + user = find_step(idx, user_steps) + zab = find_step(idx, zab_steps) + + for key, value in user.items(): + if str(value) != str(zab[key]): + return False + + return True + +# The branches are needed for CRUD and error handling +# pylint: disable=too-many-branches +def main(): + ''' + ansible zabbix module for zbx_item + ''' + + module = AnsibleModule( + argument_spec=dict( + zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), + zbx_user=dict(default=os.environ.get('ZABBIX_USER', None), type='str'), + zbx_password=dict(default=os.environ.get('ZABBIX_PASSWORD', None), type='str'), + zbx_debug=dict(default=False, type='bool'), + name=dict(default=None, require=True, type='str'), + agent=dict(default=None, type='str'), + template_name=dict(default=None, type='str'), + host_name=dict(default=None, type='str'), + interval=dict(default=60, type='int'), + application=dict(default=None, type='str'), + authentication=dict(default=None, type='str'), + http_user=dict(default=None, type='str'), + http_password=dict(default=None, type='str'), + state=dict(default='present', type='str'), + status=dict(default='enabled', type='str'), + steps=dict(default='present', type='list'), + verify_host=dict(default=False, type='bool'), + retries=dict(default=1, type='int'), + headers=dict(default=None, type='dict'), + query_type=dict(default='filter', choices=['filter', 'search'], type='str'), + ), + #supports_check_mode=True + mutually_exclusive=[['template_name', 'host_name']], + ) + + zapi = ZabbixAPI(ZabbixConnection(module.params['zbx_server'], + module.params['zbx_user'], + module.params['zbx_password'], + module.params['zbx_debug'])) + + #Set the instance and the template for the rest of the calls + zbx_class_name = 'httptest' + state = module.params['state'] + hostid = None + + # If a template name was passed then accept the template + if module.params['template_name']: + hostid = get_template_id(zapi, module.params['template_name']) + else: + hostid = get_host_id_by_name(zapi, module.params['host_name']) + + # Fail if a template was not found matching the name + if not hostid: + module.exit_json(failed=True, + changed=False, + results='Error: Could find template or host with name [%s].' % + (module.params.get('template_name', module.params['host_name'])), + state="Unkown") + + content = zapi.get_content(zbx_class_name, + 'get', + {module.params['query_type']: {'name': module.params['name']}, + 'selectSteps': 'extend', + }) + + #******# + # GET + #******# + if state == 'list': + module.exit_json(changed=False, results=content['result'], state="list") + + #******# + # DELETE + #******# + if state == 'absent': + if not exists(content): + module.exit_json(changed=False, state="absent") + + content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0]['httptestid']]) + module.exit_json(changed=True, results=content['result'], state="absent") + + # Create and Update + if state == 'present': + + params = {'name': module.params['name'], + 'hostid': hostid, + 'agent': module.params['agent'], + 'retries': module.params['retries'], + 'steps': module.params['steps'], + 'applicationid': get_app_id(zapi, module.params['application']), + 'delay': module.params['interval'], + 'verify_host': get_verify_host(module.params['verify_host']), + 'status': get_status(module.params['status']), + 'headers': module.params['headers'], + 'http_user': module.params['http_user'], + 'http_password': module.params['http_password'], + } + + + # Remove any None valued params + _ = [params.pop(key, None) for key in params.keys() if params[key] is None] + + #******# + # CREATE + #******# + if not exists(content): + content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + + module.exit_json(changed=True, results=content['result'], state='present') + + + ######## + # UPDATE + ######## + differences = {} + zab_results = content['result'][0] + for key, value in params.items(): + + if key == 'steps': + if not steps_equal(zab_results[key], value): + differences[key] = value + + elif zab_results[key] != value and zab_results[key] != str(value): + differences[key] = value + + # We have differences and need to update + if not differences: + module.exit_json(changed=False, results=zab_results, state="present") + + differences['httptestid'] = zab_results['httptestid'] + content = zapi.get_content(zbx_class_name, 'update', differences) + + if content.has_key('error'): + module.exit_json(failed=True, changed=False, results=content['error'], state="present") + + module.exit_json(changed=True, results=content['result'], state="present") + + module.exit_json(failed=True, + changed=False, + results='Unknown state passed. %s' % state, + state="unknown") + +# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled +# import module snippets. This are required +from ansible.module_utils.basic import * + +main() diff --git a/roles/lib_zabbix/library/zbx_item.py b/roles/lib_zabbix/library/zbx_item.py index 5dc3cff9b..996c98fae 100644 --- a/roles/lib_zabbix/library/zbx_item.py +++ b/roles/lib_zabbix/library/zbx_item.py @@ -41,6 +41,24 @@ def exists(content, key='result'): return True +def get_data_type(data_type): + ''' + Possible values: + 0 - decimal; + 1 - octal; + 2 - hexadecimal; + 3 - bool; + ''' + vtype = 0 + if 'octal' in data_type: + vtype = 1 + elif 'hexadecimal' in data_type: + vtype = 2 + elif 'bool' in data_type: + vtype = 3 + + return vtype + def get_value_type(value_type): ''' Possible values: @@ -158,6 +176,7 @@ def main(): template_name=dict(default=None, type='str'), zabbix_type=dict(default='trapper', type='str'), value_type=dict(default='int', type='str'), + data_type=dict(default='decimal', type='str'), interval=dict(default=60, type='int'), delta=dict(default=0, type='int'), multiplier=dict(default=None, type='str'), @@ -219,6 +238,7 @@ def main(): 'hostid': templateid[0], 'type': get_zabbix_type(module.params['zabbix_type']), 'value_type': get_value_type(module.params['value_type']), + 'data_type': get_data_type(module.params['data_type']), 'applications': get_app_ids(module.params['applications'], app_name_ids), 'formula': formula, 'multiplier': use_multiplier, diff --git a/roles/lib_zabbix/library/zbx_itemprototype.py b/roles/lib_zabbix/library/zbx_itemprototype.py index 43498c015..aca9c8336 100644 --- a/roles/lib_zabbix/library/zbx_itemprototype.py +++ b/roles/lib_zabbix/library/zbx_itemprototype.py @@ -116,6 +116,24 @@ def get_zabbix_type(ztype): return _vtype +def get_data_type(data_type): + ''' + Possible values: + 0 - decimal; + 1 - octal; + 2 - hexadecimal; + 3 - bool; + ''' + vtype = 0 + if 'octal' in data_type: + vtype = 1 + elif 'hexadecimal' in data_type: + vtype = 2 + elif 'bool' in data_type: + vtype = 3 + + return vtype + def get_value_type(value_type): ''' Possible values: @@ -175,6 +193,7 @@ def main(): interfaceid=dict(default=None, type='int'), zabbix_type=dict(default='trapper', type='str'), value_type=dict(default='float', type='str'), + data_type=dict(default='decimal', type='str'), delay=dict(default=60, type='int'), lifetime=dict(default=30, type='int'), state=dict(default='present', type='str'), @@ -238,6 +257,7 @@ def main(): 'ruleid': get_rule_id(zapi, module.params['discoveryrule_key'], template['templateid']), 'type': get_zabbix_type(module.params['zabbix_type']), 'value_type': get_value_type(module.params['value_type']), + 'data_type': get_data_type(module.params['data_type']), 'applications': get_app_ids(zapi, module.params['applications'], template['templateid']), 'formula': formula, 'multiplier': use_multiplier, diff --git a/roles/lib_zabbix/library/zbx_usergroup.py b/roles/lib_zabbix/library/zbx_usergroup.py index 297d8ef91..3fd44d80c 100644 --- a/roles/lib_zabbix/library/zbx_usergroup.py +++ b/roles/lib_zabbix/library/zbx_usergroup.py @@ -27,6 +27,10 @@ zabbix ansible module for usergroups # but different for each zabbix class. # pylint: disable=duplicate-code +# Disabling too-many-branches as we need the error checking and the if-statements +# to determine the proper state +# pylint: disable=too-many-branches + # pylint: disable=import-error from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection @@ -92,26 +96,24 @@ def get_user_status(status): return 1 -#def get_userids(zapi, users): -# ''' Get userids from user aliases -# ''' -# if not users: -# return None -# -# userids = [] -# for alias in users: -# content = zapi.get_content('user', 'get', {'search': {'alias': alias}}) -# if content['result']: -# userids.append(content['result'][0]['userid']) -# -# return userids +def get_userids(zapi, users): + ''' Get userids from user aliases + ''' + if not users: + return None + + userids = [] + for alias in users: + content = zapi.get_content('user', 'get', {'search': {'alias': alias}}) + if content['result']: + userids.append(content['result'][0]['userid']) + + return userids def main(): ''' Ansible module for usergroup ''' - ##def usergroup(self, name, rights=None, users=None, state='present', params=None): - module = AnsibleModule( argument_spec=dict( zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), @@ -123,7 +125,7 @@ def main(): status=dict(default='enabled', type='str'), name=dict(default=None, type='str', required=True), rights=dict(default=None, type='list'), - #users=dict(default=None, type='list'), + users=dict(default=None, type='list'), state=dict(default='present', type='str'), ), #supports_check_mode=True @@ -144,9 +146,15 @@ def main(): {'search': {'name': uname}, 'selectUsers': 'userid', }) + #******# + # GET + #******# if state == 'list': module.exit_json(changed=False, results=content['result'], state="list") + #******# + # DELETE + #******# if state == 'absent': if not exists(content): module.exit_json(changed=False, state="absent") @@ -157,6 +165,7 @@ def main(): content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) module.exit_json(changed=True, results=content['result'], state="absent") + # Create and Update if state == 'present': params = {'name': uname, @@ -164,26 +173,37 @@ def main(): 'users_status': get_user_status(module.params['status']), 'gui_access': get_gui_access(module.params['gui_access']), 'debug_mode': get_debug_mode(module.params['debug_mode']), - #'userids': get_userids(zapi, module.params['users']), + 'userids': get_userids(zapi, module.params['users']), } + # Remove any None valued params _ = [params.pop(key, None) for key in params.keys() if params[key] == None] + #******# + # CREATE + #******# if not exists(content): # if we didn't find it, create it content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties + + + ######## + # UPDATE + ######## differences = {} zab_results = content['result'][0] for key, value in params.items(): if key == 'rights': differences['rights'] = value - #elif key == 'userids' and zab_results.has_key('users'): - #if zab_results['users'] != value: - #differences['userids'] = value + elif key == 'userids' and zab_results.has_key('users'): + if zab_results['users'] != value: + differences['userids'] = value elif zab_results[key] != value and zab_results[key] != str(value): differences[key] = value diff --git a/roles/lib_zabbix/tasks/create_template.yml b/roles/lib_zabbix/tasks/create_template.yml index 44c4e6766..2992505bf 100644 --- a/roles/lib_zabbix/tasks/create_template.yml +++ b/roles/lib_zabbix/tasks/create_template.yml @@ -33,6 +33,7 @@ key: "{{ item.key }}" name: "{{ item.name | default(item.key, true) }}" value_type: "{{ item.value_type | default('int') }}" + data_type: "{{ item.data_type | default('decimal') }}" description: "{{ item.description | default('', True) }}" multiplier: "{{ item.multiplier | default('', True) }}" units: "{{ item.units | default('', True) }}" @@ -81,6 +82,7 @@ key: "{{ item.key }}" discoveryrule_key: "{{ item.discoveryrule_key }}" value_type: "{{ item.value_type }}" + data_type: "{{ item.data_type | default('decimal') }}" template_name: "{{ template.name }}" applications: "{{ item.applications }}" description: "{{ item.description | default('', True) }}" diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 091ba4e2b..6006bfa9d 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -651,7 +651,7 @@ def set_deployment_facts_if_unset(facts): if deployment_type in ['enterprise', 'online', 'openshift-enterprise']: registry_url = 'openshift3/ose-${component}:${version}' elif deployment_type == 'atomic-enterprise': - registry_url = 'aep3/aep-${component}:${version}' + registry_url = 'aep3_beta/aep-${component}:${version}' facts[role]['registry_url'] = registry_url if 'master' in facts: @@ -864,20 +864,38 @@ def apply_provider_facts(facts, provider_facts): return facts -def merge_facts(orig, new): +def merge_facts(orig, new, additive_facts_to_overwrite): """ Recursively merge facts dicts Args: orig (dict): existing facts new (dict): facts to update + + additive_facts_to_overwrite (list): additive facts to overwrite in jinja + '.' notation ex: ['master.named_certificates'] + Returns: dict: the merged facts """ + additive_facts = ['named_certificates'] facts = dict() for key, value in orig.iteritems(): if key in new: if isinstance(value, dict) and isinstance(new[key], dict): - facts[key] = merge_facts(value, new[key]) + relevant_additive_facts = [] + # Keep additive_facts_to_overwrite if key matches + for item in additive_facts_to_overwrite: + if '.' in item and item.startswith(key + '.'): + relevant_additive_facts.append(item) + facts[key] = merge_facts(value, new[key], relevant_additive_facts) + elif key in additive_facts and key not in [x.split('.')[-1] for x in additive_facts_to_overwrite]: + # Fact is additive so we'll combine orig and new. + if isinstance(value, list) and isinstance(new[key], list): + new_fact = [] + for item in copy.deepcopy(value) + copy.copy(new[key]): + if item not in new_fact: + new_fact.append(item) + facts[key] = new_fact else: facts[key] = copy.copy(new[key]) else: @@ -961,13 +979,15 @@ class OpenShiftFacts(object): role (str): role for setting local facts filename (str): local facts file to use local_facts (dict): local facts to set + additive_facts_to_overwrite (list): additive facts to overwrite in jinja + '.' notation ex: ['master.named_certificates'] Raises: OpenShiftFactsUnsupportedRoleError: """ known_roles = ['common', 'master', 'node', 'master_sdn', 'node_sdn', 'dns', 'etcd'] - def __init__(self, role, filename, local_facts): + def __init__(self, role, filename, local_facts, additive_facts_to_overwrite=False): self.changed = False self.filename = filename if role not in self.known_roles: @@ -976,25 +996,27 @@ class OpenShiftFacts(object): ) self.role = role self.system_facts = ansible_facts(module) - self.facts = self.generate_facts(local_facts) + self.facts = self.generate_facts(local_facts, additive_facts_to_overwrite) - def generate_facts(self, local_facts): + def generate_facts(self, local_facts, additive_facts_to_overwrite): """ Generate facts Args: local_facts (dict): local_facts for overriding generated defaults + additive_facts_to_overwrite (list): additive facts to overwrite in jinja + '.' notation ex: ['master.named_certificates'] Returns: dict: The generated facts """ - local_facts = self.init_local_facts(local_facts) + local_facts = self.init_local_facts(local_facts, additive_facts_to_overwrite) roles = local_facts.keys() defaults = self.get_defaults(roles) provider_facts = self.init_provider_facts() facts = apply_provider_facts(defaults, provider_facts) - facts = merge_facts(facts, local_facts) + facts = merge_facts(facts, local_facts, additive_facts_to_overwrite) facts['current_config'] = get_current_config(facts) facts = set_url_facts_if_unset(facts) facts = set_project_cfg_facts_if_unset(facts) @@ -1132,11 +1154,13 @@ class OpenShiftFacts(object): ) return provider_facts - def init_local_facts(self, facts=None): + def init_local_facts(self, facts=None, additive_facts_to_overwrite=False): """ Initialize the provider facts Args: facts (dict): local facts to set + additive_facts_to_overwrite (list): additive facts to overwrite in jinja + '.' notation ex: ['master.named_certificates'] Returns: dict: The result of merging the provided facts with existing @@ -1154,7 +1178,7 @@ class OpenShiftFacts(object): basestring): facts_to_set[arg] = module.from_json(facts_to_set[arg]) - new_local_facts = merge_facts(local_facts, facts_to_set) + new_local_facts = merge_facts(local_facts, facts_to_set, additive_facts_to_overwrite) for facts in new_local_facts.values(): keys_to_delete = [] for fact, value in facts.iteritems(): @@ -1184,6 +1208,7 @@ def main(): role=dict(default='common', required=False, choices=OpenShiftFacts.known_roles), local_facts=dict(default=None, type='dict', required=False), + additive_facts_to_overwrite=dict(default=[], type='list', required=False), ), supports_check_mode=True, add_file_common_args=True, @@ -1191,9 +1216,10 @@ def main(): role = module.params['role'] local_facts = module.params['local_facts'] + additive_facts_to_overwrite = module.params['additive_facts_to_overwrite'] fact_file = '/etc/ansible/facts.d/openshift.fact' - openshift_facts = OpenShiftFacts(role, fact_file, local_facts) + openshift_facts = OpenShiftFacts(role, fact_file, local_facts, additive_facts_to_overwrite) file_params = module.params.copy() file_params['path'] = fact_file diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2 index bb12a0a0f..2a37c06d9 100644 --- a/roles/openshift_master/templates/master.yaml.v1.j2 +++ b/roles/openshift_master/templates/master.yaml.v1.j2 @@ -27,9 +27,6 @@ corsAllowedOrigins: {% for custom_origin in openshift.master.custom_cors_origins | default("") %} - {{ custom_origin }} {% endfor %} -{% for name in (named_certificates | map(attribute='names')) | list | oo_flatten %} - - {{ name }} -{% endfor %} {% if 'disabled_features' in openshift.master %} disabledFeatures: {{ openshift.master.disabled_features | to_json }} {% endif %} @@ -144,9 +141,9 @@ servingInfo: keyFile: master.server.key maxRequestsInFlight: 500 requestTimeoutSeconds: 3600 -{% if named_certificates %} +{% if openshift.master.named_certificates %} namedCertificates: -{% for named_certificate in named_certificates %} +{% for named_certificate in openshift.master.named_certificates %} - certFile: {{ named_certificate['certfile'] }} keyFile: {{ named_certificate['keyfile'] }} names: diff --git a/roles/openshift_node/tasks/storage_plugins/glusterfs.yml b/roles/openshift_node/tasks/storage_plugins/glusterfs.yml index b812e81df..5cd4a6041 100644 --- a/roles/openshift_node/tasks/storage_plugins/glusterfs.yml +++ b/roles/openshift_node/tasks/storage_plugins/glusterfs.yml @@ -4,9 +4,14 @@ pkg: glusterfs-fuse state: installed -- name: Set seboolean to allow gluster storage plugin access from containers +- name: Set sebooleans to allow gluster storage plugin access from containers seboolean: - name: virt_use_fusefs + name: "{{ item }}" state: yes persistent: yes when: ansible_selinux and ansible_selinux.status == "enabled" + with_items: + - virt_use_fusefs + - virt_sandbox_use_fusefs + register: sebool_result + failed_when: "'state' not in sebool_result and 'msg' in sebool_result and 'SELinux boolean item does not exist' not in sebool_result.msg" diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index aaf5b9445..512adad4c 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -13,9 +13,16 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.api.ping + description: "Verify that the Openshift API is up" + type: int + applications: + - Openshift Master + - key: openshift.master.api.healthz description: "Checks the healthz check of the master's api: https://master_host/healthz" - type: bool + type: int + data_type: bool applications: - Openshift Master @@ -43,7 +50,13 @@ g_template_openshift_master: applications: - Openshift Master - - key: openshift.project.counter + - key: openshift.master.node.count + description: Shows the total number of nodes found in the Openshift Cluster + type: int + applications: + - Openshift Master + + - key: openshift.project.count description: Shows number of projects on a cluster type: int applications: @@ -121,6 +134,66 @@ g_template_openshift_master: applications: - Openshift Etcd + - key: openshift.master.metric.ping + description: "This check verifies that the https://master/metrics check is alive and communicating properly." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.list.5 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.list.9 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.list.99 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.5 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the end to end scheduling operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.9 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the end to end scheduling operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.99 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the end to end scheduling operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -132,6 +205,16 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high + - name: 'Openshift Master API PING check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: avg + - name: 'Openshift Master process not running on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' @@ -148,7 +231,7 @@ g_template_openshift_master: priority: info - name: 'There are no projects running on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.project.counter.last()}=0' + expression: '{Template Openshift Master:openshift.project.count.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info diff --git a/roles/oso_host_monitoring/README.md b/roles/oso_host_monitoring/README.md new file mode 100644 index 000000000..f1fa05adb --- /dev/null +++ b/roles/oso_host_monitoring/README.md @@ -0,0 +1,50 @@ +Role Name +========= + +Applies local host monitoring container(s). + +Requirements +------------ + +None. + +Role Variables +-------------- + +osohm_zagg_web_url: where to contact monitoring service +osohm_host_monitoring: name of host monitoring container +osohm_zagg_client: name of container with zabbix client +osohm_docker_registry_url: docker repository containing above containers +osohm_default_zagg_server_user: login info to zabbix server +osohm_default_zagg_password: password to zabbix server + +Dependencies +------------ + +None. + +Example Playbook +---------------- + +Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: + + - hosts: servers + roles: + - oso_host_monitoring + vars: + osohm_zagg_web_url: "https://..." + osohm_host_monitoring: "oso-rhel7-host-monitoring" + osohm_zagg_client: "oso-rhel7-zagg-client" + osohm_docker_registry_url: "docker-registry.example.com/mon/" + osohm_default_zagg_server_user: "zagg-client" + osohm_default_zagg_password: "secret" + +License +------- + +ASL 2.0 + +Author Information +------------------ + +OpenShift operations, Red Hat, Inc diff --git a/roles/oso_host_monitoring/defaults/main.yml b/roles/oso_host_monitoring/defaults/main.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/roles/oso_host_monitoring/defaults/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/oso_host_monitoring/handlers/main.yml b/roles/oso_host_monitoring/handlers/main.yml new file mode 100644 index 000000000..7863ad15b --- /dev/null +++ b/roles/oso_host_monitoring/handlers/main.yml @@ -0,0 +1,12 @@ +--- +- name: "Restart the {{ osohm_host_monitoring }} service" + service: + name: "{{ osohm_host_monitoring }}" + state: restarted + enabled: yes + +- name: "Restart the {{ osohm_zagg_client }} service" + service: + name: "{{ osohm_zagg_client }}" + state: restarted + enabled: yes diff --git a/roles/oso_host_monitoring/meta/main.yml b/roles/oso_host_monitoring/meta/main.yml new file mode 100644 index 000000000..cce30c2db --- /dev/null +++ b/roles/oso_host_monitoring/meta/main.yml @@ -0,0 +1,8 @@ +--- +galaxy_info: + author: OpenShift + description: apply monitoring container(s). + company: Red Hat, Inc + license: ASL 2.0 + min_ansible_version: 1.2 +dependencies: [] diff --git a/roles/oso_host_monitoring/tasks/main.yml b/roles/oso_host_monitoring/tasks/main.yml new file mode 100644 index 000000000..6ddfa3dcb --- /dev/null +++ b/roles/oso_host_monitoring/tasks/main.yml @@ -0,0 +1,65 @@ +--- +- fail: + msg: "This playbook requires {{item}} to be set." + when: "{{ item }} is not defined or {{ item }} == ''" + with_items: + - osohm_zagg_web_url + - osohm_host_monitoring + - osohm_zagg_client + - osohm_docker_registry_url + - osohm_default_zagg_server_user + - osohm_default_zagg_server_password + +- name: create /etc/docker/ops + file: + path: /etc/docker/ops + state: directory + mode: 0770 + group: root + owner: root + +- name: Copy dockercfg to /etc/docker/ops + template: + src: docker-registry.ops.cfg.j2 + dest: /etc/docker/ops/.dockercfg + owner: root + group: root + mode: 0600 + +- name: "Copy {{ osohm_host_monitoring }} systemd file" + template: + src: "{{ osohm_host_monitoring }}.service.j2" + dest: "/etc/systemd/system/{{ osohm_host_monitoring }}.service" + owner: root + group: root + mode: 0644 + notify: + - "Restart the {{ osohm_host_monitoring }} service" + register: systemd_host_monitoring + +- name: "Copy {{ osohm_zagg_client }} systemd file" + template: + src: "{{ osohm_zagg_client }}.service.j2" + dest: "/etc/systemd/system/{{ osohm_zagg_client }}.service" + owner: root + group: root + mode: 0644 + notify: + - "Restart the {{ osohm_zagg_client }} service" + register: zagg_systemd + +- name: reload systemd + command: /usr/bin/systemctl --system daemon-reload + when: systemd_host_monitoring | changed or zagg_systemd | changed + +- name: "Start the {{ osohm_host_monitoring }} service" + service: + name: "{{ osohm_host_monitoring }}" + state: started + enabled: yes + +- name: "Start the {{ osohm_zagg_client }} service" + service: + name: "{{ osohm_zagg_client }}" + state: started + enabled: yes diff --git a/roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j2 b/roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j2 new file mode 100644 index 000000000..9e49da469 --- /dev/null +++ b/roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j2 @@ -0,0 +1 @@ +{"{{ osohm_docker_registry_ops_url }}":{"auth":"{{ osohm_docker_registry_ops_key }}","email":"{{ osohm_docker_registry_ops_email }}"}} diff --git a/roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j2 b/roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j2 new file mode 100644 index 000000000..d18ad90fe --- /dev/null +++ b/roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j2 @@ -0,0 +1,43 @@ +# This is a systemd file to run this docker container under systemd. +# To make this work: +# * pull the image (probably from ops docker registry) +# * place this file in /etc/systemd/system without the .systemd extension +# * run the commands: +# systemctl daemon-reload +# systemctl enable pcp-docker +# systemctl start pcp-docker +# +# +[Unit] +Description=PCP Collector Contatainer +Requires=docker.service +After=docker.service + + +[Service] +Type=simple +TimeoutStartSec=5m +Environment=HOME=/etc/docker/ops +#Slice=container-small.slice + +# systemd syntax '=-' ignore errors from return codes. +ExecStartPre=-/usr/bin/docker kill "{{ osohm_host_monitoring }}" +ExecStartPre=-/usr/bin/docker rm "{{ osohm_host_monitoring }}" +ExecStartPre=-/usr/bin/docker pull "{{ osohm_docker_registry_url }}{{ osohm_host_monitoring }}" + + +ExecStart=/usr/bin/docker run --rm --name="{{ osohm_host_monitoring }}" \ + --privileged --net=host --pid=host --ipc=host \ + -v /sys:/sys:ro -v /etc/localtime:/etc/localtime:ro \ + -v /var/lib/docker:/var/lib/docker:ro -v /run:/run \ + -v /var/log:/var/log \ + {{ osohm_docker_registry_url }}{{ osohm_host_monitoring }} + +ExecReload=-/usr/bin/docker stop "{{ osohm_host_monitoring }}" +ExecReload=-/usr/bin/docker rm "{{ osohm_host_monitoring }}" +ExecStop=-/usr/bin/docker stop "{{ osohm_host_monitoring }}" +Restart=always +RestartSec=30 + +[Install] +WantedBy=default.target diff --git a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 new file mode 100644 index 000000000..978e40b88 --- /dev/null +++ b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 @@ -0,0 +1,62 @@ +# This is a systemd file to run this docker container under systemd. +# To make this work: +# * pull the image (probably from ops docker registry) +# * place this file in /etc/systemd/system without the .systemd extension +# * run the commands: +# systemctl daemon-reload +# systemctl enable zagg-client-docker +# systemctl start zagg-client-docker +# +# +[Unit] +Description=Zagg Client Contatainer +Requires=docker.service +After=docker.service + + +[Service] +Type=simple +TimeoutStartSec=5m +Environment=HOME=/etc/docker/ops +#Slice=container-small.slice + +# systemd syntax '=-' ignore errors from return codes. +ExecStartPre=-/usr/bin/docker kill "{{ osohm_zagg_client }}" +ExecStartPre=-/usr/bin/docker rm "{{ osohm_zagg_client }}" +ExecStartPre=-/usr/bin/docker pull "{{ osohm_docker_registry_url }}{{ osohm_zagg_client }}" + + +ExecStart=/usr/bin/docker run --name {{ osohm_zagg_client }} \ + --privileged \ + --pid=host \ + --net=host \ + -e ZAGG_URL={{ osohm_zagg_web_url }} \ + -e ZAGG_USER={{ osohm_default_zagg_server_user }} \ + -e ZAGG_PASSWORD={{ osohm_default_zagg_server_password }} \ + -e ZAGG_CLIENT_HOSTNAME={{ ec2_tag_Name }} \ + -e ZAGG_SSL_VERIFY={{ osohm_zagg_verify_ssl }} \ + -e OSO_CLUSTER_GROUP={{ cluster_group }} \ + -e OSO_CLUSTER_ID={{ oo_clusterid }} \ + -e OSO_HOST_TYPE={{ hostvars[inventory_hostname]['ec2_tag_host-type'] }} \ + -e OSO_SUB_HOST_TYPE={{ hostvars[inventory_hostname]['ec2_tag_sub-host-type'] }} \ + -v /etc/localtime:/etc/localtime \ + -v /run/pcp:/run/pcp \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /var/run/openvswitch/db.sock:/var/run/openvswitch/db.sock \ +{% if hostvars[inventory_hostname]['ec2_tag_host-type'] == 'master' %} + -v /etc/openshift/master/admin.kubeconfig:/etc/openshift/master/admin.kubeconfig \ + -v /etc/openshift/master/master.etcd-client.crt:/etc/openshift/master/master.etcd-client.crt \ + -v /etc/openshift/master/master.etcd-client.key:/etc/openshift/master/master.etcd-client.key \ + -v /etc/openshift/master/master-config.yaml:/etc/openshift/master/master-config.yaml \ +{% endif %} + {{ osohm_docker_registry_url }}{{ osohm_zagg_client }} + + +ExecReload=-/usr/bin/docker stop "{{ osohm_zagg_client }}" +ExecReload=-/usr/bin/docker rm "{{ osohm_zagg_client }}" +ExecStop=-/usr/bin/docker stop "{{ osohm_zagg_client }}" +Restart=always +RestartSec=30 + +[Install] +WantedBy=default.target diff --git a/roles/oso_host_monitoring/vars/main.yml b/roles/oso_host_monitoring/vars/main.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/roles/oso_host_monitoring/vars/main.yml @@ -0,0 +1 @@ +--- |