From 1d5d13308bd79051b0bf3311240ef0e6cb286392 Mon Sep 17 00:00:00 2001 From: Luke Meyer Date: Tue, 16 May 2017 12:24:46 -0400 Subject: health checks: configure failure output in playbooks Customized the error summary to depend on the intent of the playbook run. Ensured output makes sense when failures are unrelated to running checks. --- playbooks/byo/config.yml | 12 --- playbooks/byo/openshift-cluster/config.yml | 13 +++ playbooks/common/openshift-checks/health.yml | 11 ++- playbooks/common/openshift-checks/pre-install.yml | 11 ++- .../action_plugins/openshift_health_check.py | 6 +- .../callback_plugins/zz_failure_summary.py | 107 ++++++++++++--------- 6 files changed, 93 insertions(+), 67 deletions(-) diff --git a/playbooks/byo/config.yml b/playbooks/byo/config.yml index 050b271ca..7d03914a2 100644 --- a/playbooks/byo/config.yml +++ b/playbooks/byo/config.yml @@ -1,14 +1,2 @@ --- -- name: Verify Requirements - # REVIEW: what's the proper group to use: OSEv3, g_all_hosts or something else? - hosts: OSEv3 - roles: - - openshift_health_checker - post_tasks: - - action: openshift_health_check - args: - checks: - - disk_availability - - memory_availability - - include: openshift-cluster/config.yml diff --git a/playbooks/byo/openshift-cluster/config.yml b/playbooks/byo/openshift-cluster/config.yml index acf5469bf..fd4a9eb26 100644 --- a/playbooks/byo/openshift-cluster/config.yml +++ b/playbooks/byo/openshift-cluster/config.yml @@ -3,6 +3,19 @@ tags: - always +- name: Verify Requirements + hosts: OSEv3 + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: "install" + post_tasks: + - action: openshift_health_check + args: + checks: + - disk_availability + - memory_availability + - include: ../../common/openshift-cluster/std_include.yml tags: - always diff --git a/playbooks/common/openshift-checks/health.yml b/playbooks/common/openshift-checks/health.yml index fc0f523d5..1bee460e8 100644 --- a/playbooks/common/openshift-checks/health.yml +++ b/playbooks/common/openshift-checks/health.yml @@ -2,9 +2,10 @@ - name: Run OpenShift health checks hosts: OSEv3 roles: - - openshift_health_checker + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: "health" post_tasks: - - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 - args: - checks: - - '@health' + - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 + args: + checks: ['@health'] diff --git a/playbooks/common/openshift-checks/pre-install.yml b/playbooks/common/openshift-checks/pre-install.yml index c8ffc3d91..e01c6f38d 100644 --- a/playbooks/common/openshift-checks/pre-install.yml +++ b/playbooks/common/openshift-checks/pre-install.yml @@ -2,9 +2,10 @@ - hosts: OSEv3 name: run OpenShift pre-install checks roles: - - openshift_health_checker + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: "pre-install" post_tasks: - - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 - args: - checks: - - '@preflight' + - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 + args: + checks: ['@preflight'] diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py index e8a4c6474..a1df9cea3 100644 --- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -25,9 +25,11 @@ class ActionModule(ActionBase): def run(self, tmp=None, task_vars=None): result = super(ActionModule, self).run(tmp, task_vars) + task_vars = task_vars or {} - if task_vars is None: - task_vars = {} + # vars are not supportably available in the callback plugin, + # so record any it will need in the result. + result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context') if "openshift" not in task_vars: result["failed"] = True diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py index 1124c4125..64c29a8d9 100644 --- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py +++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py @@ -2,6 +2,12 @@ Ansible callback plugin. ''' +# Reason: In several locations below we disable pylint protected-access +# for Ansible objects that do not give us any public way +# to access the full details we need to report check failures. +# Status: disabled permanently or until Ansible object has a public API. +# This does leave the code more likely to be broken by future Ansible changes. + from pprint import pformat from ansible.plugins.callback import CallbackBase @@ -20,38 +26,37 @@ class CallbackModule(CallbackBase): CALLBACK_TYPE = 'aggregate' CALLBACK_NAME = 'failure_summary' CALLBACK_NEEDS_WHITELIST = False + _playbook_file = None def __init__(self): super(CallbackModule, self).__init__() self.__failures = [] + def v2_playbook_on_start(self, playbook): + super(CallbackModule, self).v2_playbook_on_start(playbook) + # re: playbook attrs see top comment # pylint: disable=protected-access + self._playbook_file = playbook._file_name + def v2_runner_on_failed(self, result, ignore_errors=False): super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors) self.__failures.append(dict(result=result, ignore_errors=ignore_errors)) def v2_playbook_on_stats(self, stats): super(CallbackModule, self).v2_playbook_on_stats(stats) - # TODO: update condition to consider a host var or env var to - # enable/disable the summary, so that we can control the output from a - # play. if self.__failures: - self._print_failure_summary() + self._print_failure_details(self.__failures) - def _print_failure_summary(self): - '''Print a summary of failed tasks (including ignored failures).''' + def _print_failure_details(self, failures): + '''Print a summary of failed tasks or checks.''' self._display.display(u'\nFailure summary:\n') - # TODO: group failures by host or by task. If grouped by host, it is - # easy to see all problems of a given host. If grouped by task, it is - # easy to see what hosts needs the same fix. - - width = len(str(len(self.__failures))) + width = len(str(len(failures))) initial_indent_format = u' {{:>{width}}}. '.format(width=width) initial_indent_len = len(initial_indent_format.format(0)) subsequent_indent = u' ' * initial_indent_len subsequent_extra_indent = u' ' * (initial_indent_len + 10) - for i, failure in enumerate(self.__failures, 1): + for i, failure in enumerate(failures, 1): entries = _format_failure(failure) self._display.display(u'\n{}{}'.format(initial_indent_format.format(i), entries[0])) for entry in entries[1:]: @@ -59,33 +64,52 @@ class CallbackModule(CallbackBase): indented = u'{}{}'.format(subsequent_indent, entry) self._display.display(indented) - if self.__failures: - failed_checks = [ + failed_checks = set() + playbook_context = None + # re: result attrs see top comment # pylint: disable=protected-access + for failure in failures: + # get context from check task result since callback plugins cannot access task vars + playbook_context = playbook_context or failure['result']._result.get('playbook_context') + failed_checks.update( name - for name, result in failure['result']._result.get('checks', []).items() - for failure in self.__failures - if result.get('failed', False) - ] - # FIXME: get name of currently running playbook, if possible. - NAME_OF_PLAYBOOK = 'playbooks/byo/config.yml' - msg = ( - "\nThe execution of the playbook '{}' includes checks designed " - 'to ensure it can complete successfully. One or more of these ' - 'checks failed. You may choose to disable checks by setting an ' - 'Ansible variable:\n\n' - ' openshift_disable_check={}\n\n' - 'Set the variable to a comma-separated list of check names. ' - 'Check names are shown in the failure summary above.\n' - 'The variable can be set in the inventory or passed in the ' - 'command line using the -e flag to ansible-playbook.' - ).format(NAME_OF_PLAYBOOK, ','.join(sorted(set(failed_checks)))) - self._display.display(msg) - - -# Reason: disable pylint protected-access because we need to access _* -# attributes of a task result to implement this method. -# Status: permanently disabled unless Ansible's API changes. -# pylint: disable=protected-access + for name, result in failure['result']._result.get('checks', {}).items() + if result.get('failed') + ) + if failed_checks: + self._print_check_failure_summary(failed_checks, playbook_context) + + def _print_check_failure_summary(self, failed_checks, context): + checks = ','.join(sorted(failed_checks)) + # NOTE: context is not set if all failures occurred prior to checks task + summary = ( + '\n' + 'The execution of "{playbook}"\n' + 'includes checks designed to fail early if the requirements\n' + 'of the playbook are not met. One or more of these checks\n' + 'failed. To disregard these results, you may choose to\n' + 'disable failing checks by setting an Ansible variable:\n\n' + ' openshift_disable_check={checks}\n\n' + 'Failing check names are shown in the failure details above.\n' + 'Some checks may be configurable by variables if your requirements\n' + 'are different from the defaults; consult check documentation.\n' + 'Variables can be set in the inventory or passed on the\n' + 'command line using the -e flag to ansible-playbook.\n' + ).format(playbook=self._playbook_file, checks=checks) + if context in ['pre-install', 'health']: + summary = ( + '\n' + 'You may choose to configure or disable failing checks by\n' + 'setting Ansible variables. To disable those above:\n\n' + ' openshift_disable_check={checks}\n\n' + 'Consult check documentation for configurable variables.\n' + 'Variables can be set in the inventory or passed on the\n' + 'command line using the -e flag to ansible-playbook.\n' + ).format(checks=checks) + # other expected contexts: install, upgrade + self._display.display(summary) + + +# re: result attrs see top comment # pylint: disable=protected-access def _format_failure(failure): '''Return a list of pretty-formatted text entries describing a failure, including relevant information about it. Expect that the list of text entries will be joined @@ -122,11 +146,8 @@ def _format_failed_checks(checks): return stringc(pformat(checks), C.COLOR_ERROR) -# Reason: disable pylint protected-access because we need to access _* -# attributes of obj to implement this function. -# This is inspired by ansible.playbook.base.Base.dump_me. -# Status: permanently disabled unless Ansible's API changes. -# pylint: disable=protected-access +# This is inspired by ansible.playbook.base.Base.dump_me. +# re: play/task/block attrs see top comment # pylint: disable=protected-access def _get_play(obj): '''Given a task or block, recursively tries to find its parent play.''' if hasattr(obj, '_play'): -- cgit v1.2.3