diff options
-rw-r--r-- | playbooks/byo/openshift-preflight/README.md | 43 | ||||
-rw-r--r-- | playbooks/byo/openshift-preflight/check.yml | 31 | ||||
-rwxr-xr-x | roles/openshift_facts/library/openshift_facts.py | 8 | ||||
-rw-r--r-- | roles/openshift_preflight/README.md | 52 | ||||
-rwxr-xr-x | roles/openshift_preflight/base/library/aos_version.py | 100 | ||||
-rwxr-xr-x | roles/openshift_preflight/base/library/check_yum_update.py | 116 | ||||
-rw-r--r-- | roles/openshift_preflight/common/meta/main.yml | 3 | ||||
-rw-r--r-- | roles/openshift_preflight/common/tasks/main.yml | 21 | ||||
-rw-r--r-- | roles/openshift_preflight/init/meta/main.yml | 3 | ||||
-rw-r--r-- | roles/openshift_preflight/init/tasks/main.yml | 4 | ||||
-rw-r--r-- | roles/openshift_preflight/masters/meta/main.yml | 3 | ||||
-rw-r--r-- | roles/openshift_preflight/masters/tasks/main.yml | 31 | ||||
-rw-r--r-- | roles/openshift_preflight/nodes/meta/main.yml | 3 | ||||
-rw-r--r-- | roles/openshift_preflight/nodes/tasks/main.yml | 41 | ||||
-rw-r--r-- | roles/openshift_preflight/verify_status/callback_plugins/zz_failure_summary.py | 96 | ||||
-rw-r--r-- | roles/openshift_preflight/verify_status/tasks/main.yml | 8 |
16 files changed, 560 insertions, 3 deletions
diff --git a/playbooks/byo/openshift-preflight/README.md b/playbooks/byo/openshift-preflight/README.md new file mode 100644 index 000000000..b50292eac --- /dev/null +++ b/playbooks/byo/openshift-preflight/README.md @@ -0,0 +1,43 @@ +# OpenShift preflight checks + +Here we provide an Ansible playbook for detecting potential roadblocks prior to +an install or upgrade. + +Ansible's default operation mode is to fail fast, on the first error. However, +when performing checks, it is useful to gather as much information about +problems as possible in a single run. + +The `check.yml` playbook runs a battery of checks against the inventory hosts +and tells Ansible to ignore intermediate errors, thus giving a more complete +diagnostic of the state of each host. Still, if any check failed, the playbook +run will be marked as having failed. + +To facilitate understanding the problems that were encountered, we provide a +custom callback plugin to summarize execution errors at the end of a playbook +run. + +--- + +*Note that currently the `check.yml` playbook is only useful for RPM-based +installations. Containerized installs are excluded from checks for now, but +might be included in the future if there is demand for that.* + +--- + +## Running + +With an installation of Ansible 2.2 or greater, run the playbook directly +against your inventory file. Here is the step-by-step: + +1. If you haven't done it yet, clone this repository: + + ```console + $ git clone https://github.com/openshift/openshift-ansible + $ cd openshift-ansible + ``` + +2. Run the playbook: + + ```console + $ ansible-playbook -i <inventory file> playbooks/byo/openshift-preflight/check.yml + ``` diff --git a/playbooks/byo/openshift-preflight/check.yml b/playbooks/byo/openshift-preflight/check.yml new file mode 100644 index 000000000..32673d01d --- /dev/null +++ b/playbooks/byo/openshift-preflight/check.yml @@ -0,0 +1,31 @@ +--- +- hosts: OSEv3 + roles: + - openshift_preflight/init + +- hosts: OSEv3 + name: checks that apply to all hosts + gather_facts: no + ignore_errors: yes + roles: + - openshift_preflight/common + +- hosts: masters + name: checks that apply to masters + gather_facts: no + ignore_errors: yes + roles: + - openshift_preflight/masters + +- hosts: nodes + name: checks that apply to nodes + gather_facts: no + ignore_errors: yes + roles: + - openshift_preflight/nodes + +- hosts: OSEv3 + name: verify check results + gather_facts: no + roles: + - openshift_preflight/verify_status diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 10e30f1c4..616b41c7b 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -1246,10 +1246,10 @@ def build_api_server_args(facts): def is_service_running(service): """ Queries systemd through dbus to see if the service is running """ service_running = False - bus = SystemBus() - systemd = bus.get_object('org.freedesktop.systemd1', '/org/freedesktop/systemd1') - manager = Interface(systemd, dbus_interface='org.freedesktop.systemd1.Manager') try: + bus = SystemBus() + systemd = bus.get_object('org.freedesktop.systemd1', '/org/freedesktop/systemd1') + manager = Interface(systemd, dbus_interface='org.freedesktop.systemd1.Manager') service_unit = service if service.endswith('.service') else manager.GetUnit('{0}.service'.format(service)) service_proxy = bus.get_object('org.freedesktop.systemd1', str(service_unit)) service_properties = Interface(service_proxy, dbus_interface='org.freedesktop.DBus.Properties') @@ -1258,6 +1258,8 @@ def is_service_running(service): if service_load_state == 'loaded' and service_active_state == 'active': service_running = True except DBusException: + # TODO: do not swallow exception, as it may be hiding useful debugging + # information. pass return service_running diff --git a/roles/openshift_preflight/README.md b/roles/openshift_preflight/README.md new file mode 100644 index 000000000..b6d3542d3 --- /dev/null +++ b/roles/openshift_preflight/README.md @@ -0,0 +1,52 @@ +OpenShift Preflight Checks +========================== + +This role detects common problems prior to installing OpenShift. + +Requirements +------------ + +* Ansible 2.2+ + +Role Variables +-------------- + +None + +Dependencies +------------ + +None + +Example Playbook +---------------- + +```yaml +--- +- hosts: OSEv3 + roles: + - openshift_preflight/init + +- hosts: OSEv3 + name: checks that apply to all hosts + gather_facts: no + ignore_errors: yes + roles: + - openshift_preflight/common + +- hosts: OSEv3 + name: verify check results + gather_facts: no + roles: + - openshift_preflight/verify_status +``` + +License +------- + +Apache License Version 2.0 + +Author Information +------------------ + +Customer Success team (dev@lists.openshift.redhat.com) diff --git a/roles/openshift_preflight/base/library/aos_version.py b/roles/openshift_preflight/base/library/aos_version.py new file mode 100755 index 000000000..f7fcb6da5 --- /dev/null +++ b/roles/openshift_preflight/base/library/aos_version.py @@ -0,0 +1,100 @@ +#!/usr/bin/python +# vim: expandtab:tabstop=4:shiftwidth=4 +''' +An ansible module for determining if more than one minor version +of any atomic-openshift package is available, which would indicate +that multiple repos are enabled for different versions of the same +thing which may cause problems. + +Also, determine if the version requested is available down to the +precision requested. +''' + +# import os +# import sys +import yum # pylint: disable=import-error +from ansible.module_utils.basic import AnsibleModule + + +def main(): # pylint: disable=missing-docstring + module = AnsibleModule( + argument_spec=dict( + version=dict(required=True) + ), + supports_check_mode=True + ) + + # NOTE(rhcarvalho): sosiouxme added _unmute, but I couldn't find a case yet + # for when it is actually necessary. Leaving it commented out for now, + # though this comment and the commented out code related to _unmute should + # be deleted later if not proven necessary. + + # sys.stdout = os.devnull # mute yum so it doesn't break our output + # sys.stderr = os.devnull # mute yum so it doesn't break our output + + # def _unmute(): # pylint: disable=missing-docstring + # sys.stdout = sys.__stdout__ + + def bail(error): # pylint: disable=missing-docstring + # _unmute() + module.fail_json(msg=error) + + yb = yum.YumBase() # pylint: disable=invalid-name + + # search for package versions available for aos pkgs + expected_pkgs = [ + 'atomic-openshift', + 'atomic-openshift-master', + 'atomic-openshift-node', + ] + try: + pkgs = yb.pkgSack.returnPackages(patterns=expected_pkgs) + except yum.Errors.PackageSackError as e: # pylint: disable=invalid-name + # you only hit this if *none* of the packages are available + bail('Unable to find any atomic-openshift packages. \nCheck your subscription and repo settings. \n%s' % e) + + # determine what level of precision we're expecting for the version + expected_version = module.params['version'] + if expected_version.startswith('v'): # v3.3 => 3.3 + expected_version = expected_version[1:] + num_dots = expected_version.count('.') + + pkgs_by_name_version = {} + pkgs_precise_version_found = {} + for pkg in pkgs: + # get expected version precision + match_version = '.'.join(pkg.version.split('.')[:num_dots + 1]) + if match_version == expected_version: + pkgs_precise_version_found[pkg.name] = True + # get x.y version precision + minor_version = '.'.join(pkg.version.split('.')[:2]) + if pkg.name not in pkgs_by_name_version: + pkgs_by_name_version[pkg.name] = {} + pkgs_by_name_version[pkg.name][minor_version] = True + + # see if any packages couldn't be found at requested version + # see if any packages are available in more than one minor version + not_found = [] + multi_found = [] + for name in expected_pkgs: + if name not in pkgs_precise_version_found: + not_found.append(name) + if name in pkgs_by_name_version and len(pkgs_by_name_version[name]) > 1: + multi_found.append(name) + if not_found: + msg = 'Not all of the required packages are available at requested version %s:\n' % expected_version + for name in not_found: + msg += ' %s\n' % name + bail(msg + 'Please check your subscriptions and enabled repositories.') + if multi_found: + msg = 'Multiple minor versions of these packages are available\n' + for name in multi_found: + msg += ' %s\n' % name + bail(msg + "There should only be one OpenShift version's repository enabled at a time.") + + # _unmute() + module.exit_json(changed=False) + + +if __name__ == '__main__': + main() diff --git a/roles/openshift_preflight/base/library/check_yum_update.py b/roles/openshift_preflight/base/library/check_yum_update.py new file mode 100755 index 000000000..296ebd44f --- /dev/null +++ b/roles/openshift_preflight/base/library/check_yum_update.py @@ -0,0 +1,116 @@ +#!/usr/bin/python +# vim: expandtab:tabstop=4:shiftwidth=4 +''' +Ansible module to test whether a yum update or install will succeed, +without actually performing it or running yum. +parameters: + packages: (optional) A list of package names to install or update. + If omitted, all installed RPMs are considered for updates. +''' + +# import os +import sys +import yum # pylint: disable=import-error +from ansible.module_utils.basic import AnsibleModule + + +def main(): # pylint: disable=missing-docstring,too-many-branches + module = AnsibleModule( + argument_spec=dict( + packages=dict(type='list', default=[]) + ), + supports_check_mode=True + ) + + # NOTE(rhcarvalho): sosiouxme added _unmute, but I couldn't find a case yet + # for when it is actually necessary. Leaving it commented out for now, + # though this comment and the commented out code related to _unmute should + # be deleted later if not proven necessary. + + # sys.stdout = os.devnull # mute yum so it doesn't break our output + + # def _unmute(): # pylint: disable=missing-docstring + # sys.stdout = sys.__stdout__ + + def bail(error): # pylint: disable=missing-docstring + # _unmute() + module.fail_json(msg=error) + + yb = yum.YumBase() # pylint: disable=invalid-name + # determine if the existing yum configuration is valid + try: + yb.repos.populateSack(mdtype='metadata', cacheonly=1) + # for error of type: + # 1. can't reach the repo URL(s) + except yum.Errors.NoMoreMirrorsRepoError as e: # pylint: disable=invalid-name + bail('Error getting data from at least one yum repository: %s' % e) + # 2. invalid repo definition + except yum.Errors.RepoError as e: # pylint: disable=invalid-name + bail('Error with yum repository configuration: %s' % e) + # 3. other/unknown + # * just report the problem verbatim + except: # pylint: disable=bare-except; # noqa + bail('Unexpected error with yum repository: %s' % sys.exc_info()[1]) + + packages = module.params['packages'] + no_such_pkg = [] + for pkg in packages: + try: + yb.install(name=pkg) + except yum.Errors.InstallError as e: # pylint: disable=invalid-name + no_such_pkg.append(pkg) + except: # pylint: disable=bare-except; # noqa + bail('Unexpected error with yum install/update: %s' % + sys.exc_info()[1]) + if not packages: + # no packages requested means test a yum update of everything + yb.update() + elif no_such_pkg: + # wanted specific packages to install but some aren't available + user_msg = 'Cannot install all of the necessary packages. Unavailable:\n' + for pkg in no_such_pkg: + user_msg += ' %s\n' % pkg + user_msg += 'You may need to enable one or more yum repositories to make this content available.' + bail(user_msg) + + try: + txn_result, txn_msgs = yb.buildTransaction() + except: # pylint: disable=bare-except; # noqa + bail('Unexpected error during dependency resolution for yum update: \n %s' % + sys.exc_info()[1]) + + # find out if there are any errors with the update/install + if txn_result == 0: # 'normal exit' meaning there's nothing to install/update + pass + elif txn_result == 1: # error with transaction + user_msg = 'Could not perform a yum update.\n' + if len(txn_msgs) > 0: + user_msg += 'Errors from dependency resolution:\n' + for msg in txn_msgs: + user_msg += ' %s\n' % msg + user_msg += 'You should resolve these issues before proceeding with an install.\n' + user_msg += 'You may need to remove or downgrade packages or enable/disable yum repositories.' + bail(user_msg) + # TODO: it would be nice depending on the problem: + # 1. dependency for update not found + # * construct the dependency tree + # * find the installed package(s) that required the missing dep + # * determine if any of these packages matter to openshift + # * build helpful error output + # 2. conflicts among packages in available content + # * analyze dependency tree and build helpful error output + # 3. other/unknown + # * report the problem verbatim + # * add to this list as we come across problems we can clearly diagnose + elif txn_result == 2: # everything resolved fine + pass + else: + bail('Unknown error(s) from dependency resolution. Exit Code: %d:\n%s' % + (txn_result, txn_msgs)) + + # _unmute() + module.exit_json(changed=False) + + +if __name__ == '__main__': + main() diff --git a/roles/openshift_preflight/common/meta/main.yml b/roles/openshift_preflight/common/meta/main.yml new file mode 100644 index 000000000..6f23cbf3b --- /dev/null +++ b/roles/openshift_preflight/common/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: openshift_preflight/base diff --git a/roles/openshift_preflight/common/tasks/main.yml b/roles/openshift_preflight/common/tasks/main.yml new file mode 100644 index 000000000..f1a4a160e --- /dev/null +++ b/roles/openshift_preflight/common/tasks/main.yml @@ -0,0 +1,21 @@ +--- +# check content available on all hosts +- when: not openshift.common.is_containerized | bool + block: + + - name: determine if yum update will work + action: check_yum_update + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'determine if yum update will work'})] }}" + + - name: determine if expected version matches what is available + aos_version: + version: "{{ openshift_release }}" + when: + - deployment_type == "openshift-enterprise" + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'determine if expected version matches what is available'})] }}" diff --git a/roles/openshift_preflight/init/meta/main.yml b/roles/openshift_preflight/init/meta/main.yml new file mode 100644 index 000000000..0bbeadd34 --- /dev/null +++ b/roles/openshift_preflight/init/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: openshift_facts diff --git a/roles/openshift_preflight/init/tasks/main.yml b/roles/openshift_preflight/init/tasks/main.yml new file mode 100644 index 000000000..bf2d82196 --- /dev/null +++ b/roles/openshift_preflight/init/tasks/main.yml @@ -0,0 +1,4 @@ +--- +- name: set common variables + set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results | default([]) }}" diff --git a/roles/openshift_preflight/masters/meta/main.yml b/roles/openshift_preflight/masters/meta/main.yml new file mode 100644 index 000000000..6f23cbf3b --- /dev/null +++ b/roles/openshift_preflight/masters/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: openshift_preflight/base diff --git a/roles/openshift_preflight/masters/tasks/main.yml b/roles/openshift_preflight/masters/tasks/main.yml new file mode 100644 index 000000000..35fb1e3ca --- /dev/null +++ b/roles/openshift_preflight/masters/tasks/main.yml @@ -0,0 +1,31 @@ +--- +# determine if yum install of master pkgs will work +- when: not openshift.common.is_containerized | bool + block: + + - name: main master packages availability + check_yum_update: + packages: + - "{{ openshift.common.service_type }}" + - "{{ openshift.common.service_type }}-clients" + - "{{ openshift.common.service_type }}-master" + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main master packages availability'})] }}" + + - name: other master packages availability + check_yum_update: + packages: + - etcd + - bash-completion + - cockpit-bridge + - cockpit-docker + - cockpit-kubernetes + - cockpit-shell + - cockpit-ws + - httpd-tools + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'other master packages availability'})] }}" diff --git a/roles/openshift_preflight/nodes/meta/main.yml b/roles/openshift_preflight/nodes/meta/main.yml new file mode 100644 index 000000000..6f23cbf3b --- /dev/null +++ b/roles/openshift_preflight/nodes/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: openshift_preflight/base diff --git a/roles/openshift_preflight/nodes/tasks/main.yml b/roles/openshift_preflight/nodes/tasks/main.yml new file mode 100644 index 000000000..a10e69024 --- /dev/null +++ b/roles/openshift_preflight/nodes/tasks/main.yml @@ -0,0 +1,41 @@ +--- +# determine if yum install of node pkgs will work +- when: not openshift.common.is_containerized | bool + block: + + - name: main node packages availability + check_yum_update: + packages: + - "{{ openshift.common.service_type }}" + - "{{ openshift.common.service_type }}-node" + - "{{ openshift.common.service_type }}-sdn-ovs" + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main node packages availability'})] }}" + + - name: other node packages availability + check_yum_update: + packages: + - docker + - PyYAML + - firewalld + - iptables + - iptables-services + - nfs-utils + - ntp + - yum-utils + - dnsmasq + - libselinux-python + - ceph-common + - glusterfs-fuse + - iscsi-initiator-utils + - pyparted + - python-httplib2 + - openssl + - flannel + - bind + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'other node packages availability'})] }}" diff --git a/roles/openshift_preflight/verify_status/callback_plugins/zz_failure_summary.py b/roles/openshift_preflight/verify_status/callback_plugins/zz_failure_summary.py new file mode 100644 index 000000000..180ed8d8f --- /dev/null +++ b/roles/openshift_preflight/verify_status/callback_plugins/zz_failure_summary.py @@ -0,0 +1,96 @@ +# vim: expandtab:tabstop=4:shiftwidth=4 +''' +Ansible callback plugin. +''' + +from ansible.plugins.callback import CallbackBase +from ansible import constants as C +from ansible.utils.color import stringc + + +class CallbackModule(CallbackBase): + ''' + This callback plugin stores task results and summarizes failures. + The file name is prefixed with `zz_` to make this plugin be loaded last by + Ansible, thus making its output the last thing that users see. + ''' + + CALLBACK_VERSION = 2.0 + CALLBACK_TYPE = 'aggregate' + CALLBACK_NAME = 'failure_summary' + CALLBACK_NEEDS_WHITELIST = False + + def __init__(self): + super(CallbackModule, self).__init__() + self.__failures = [] + + def v2_runner_on_failed(self, result, ignore_errors=False): + super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors) + self.__failures.append(dict(result=result, ignore_errors=ignore_errors)) + + def v2_playbook_on_stats(self, stats): + super(CallbackModule, self).v2_playbook_on_stats(stats) + # TODO: update condition to consider a host var or env var to + # enable/disable the summary, so that we can control the output from a + # play. + if self.__failures: + self._print_failure_summary() + + def _print_failure_summary(self): + '''Print a summary of failed tasks (including ignored failures).''' + self._display.display(u'\nFailure summary:\n') + + # TODO: group failures by host or by task. If grouped by host, it is + # easy to see all problems of a given host. If grouped by task, it is + # easy to see what hosts needs the same fix. + + width = len(str(len(self.__failures))) + initial_indent_format = u' {{:>{width}}}. '.format(width=width) + initial_indent_len = len(initial_indent_format.format(0)) + subsequent_indent = u' ' * initial_indent_len + subsequent_extra_indent = u' ' * (initial_indent_len + 10) + + for i, failure in enumerate(self.__failures, 1): + lines = _format_failure(failure) + self._display.display(u'\n{}{}'.format(initial_indent_format.format(i), lines[0])) + for line in lines[1:]: + line = line.replace(u'\n', u'\n' + subsequent_extra_indent) + indented = u'{}{}'.format(subsequent_indent, line) + self._display.display(indented) + + +# Reason: disable pylint protected-access because we need to access _* +# attributes of a task result to implement this method. +# Status: permanently disabled unless Ansible's API changes. +# pylint: disable=protected-access +def _format_failure(failure): + '''Return a list of pretty-formatted lines describing a failure, including + relevant information about it. Line separators are not included.''' + result = failure['result'] + host = result._host.get_name() + play = _get_play(result._task) + if play: + play = play.get_name() + task = result._task.get_name() + msg = result._result.get('msg', u'???') + rows = ( + (u'Host', host), + (u'Play', play), + (u'Task', task), + (u'Message', stringc(msg, C.COLOR_ERROR)), + ) + row_format = '{:10}{}' + return [row_format.format(header + u':', body) for header, body in rows] + + +# Reason: disable pylint protected-access because we need to access _* +# attributes of obj to implement this function. +# This is inspired by ansible.playbook.base.Base.dump_me. +# Status: permanently disabled unless Ansible's API changes. +# pylint: disable=protected-access +def _get_play(obj): + '''Given a task or block, recursively tries to find its parent play.''' + if hasattr(obj, '_play'): + return obj._play + if getattr(obj, '_parent'): + return _get_play(obj._parent) diff --git a/roles/openshift_preflight/verify_status/tasks/main.yml b/roles/openshift_preflight/verify_status/tasks/main.yml new file mode 100644 index 000000000..36ccf648a --- /dev/null +++ b/roles/openshift_preflight/verify_status/tasks/main.yml @@ -0,0 +1,8 @@ +--- +- name: find check failures + set_fact: + oo_preflight_check_failures: "{{ oo_preflight_check_results | select('failed', 'equalto', True) | list }}" + +- name: ensure all checks succeed + action: fail + when: oo_preflight_check_failures |