17 files changed, 637 insertions, 332 deletions
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
index 05e53333d..d02a43655 100644
--- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py
+++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
@@ -1,76 +1,74 @@
 """
 Ansible action plugin to execute health checks in OpenShift clusters.
 """
-# pylint: disable=wrong-import-position,missing-docstring,invalid-name
 import sys
 import os
+import traceback
 from collections import defaultdict
 
+from ansible.plugins.action import ActionBase
+from ansible.module_utils.six import string_types
+
 try:
     from __main__ import display
 except ImportError:
+    # pylint: disable=ungrouped-imports; this is the standard way how to import
+    # the default display object in Ansible action plugins.
     from ansible.utils.display import Display
     display = Display()
 
-from ansible.plugins.action import ActionBase
-from ansible.module_utils.six import string_types
-
 # Augment sys.path so that we can import checks from a directory relative to
 # this callback plugin.
 sys.path.insert(1, os.path.dirname(os.path.dirname(__file__)))
 
+# pylint: disable=wrong-import-position; the import statement must come after
+# the manipulation of sys.path.
 from openshift_checks import OpenShiftCheck, OpenShiftCheckException, load_checks  # noqa: E402
 
 
 class ActionModule(ActionBase):
+    """Action plugin to execute health checks."""
 
     def run(self, tmp=None, task_vars=None):
         result = super(ActionModule, self).run(tmp, task_vars)
         task_vars = task_vars or {}
 
-        # vars are not supportably available in the callback plugin,
-        # so record any it will need in the result.
+        # callback plugins cannot read Ansible vars, but we would like
+        # zz_failure_summary to have access to certain values. We do so by
+        # storing the information we need in the result.
         result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context')
 
-        if "openshift" not in task_vars:
-            result["failed"] = True
-            result["msg"] = "'openshift' is undefined, did 'openshift_facts' run?"
-            return result
-
         try:
             known_checks = self.load_known_checks(tmp, task_vars)
             args = self._task.args
             requested_checks = normalize(args.get('checks', []))
+
+            if not requested_checks:
+                result['failed'] = True
+                result['msg'] = list_known_checks(known_checks)
+                return result
+
             resolved_checks = resolve_checks(requested_checks, known_checks.values())
-        except OpenShiftCheckException as e:
+        except OpenShiftCheckException as exc:
             result["failed"] = True
-            result["msg"] = str(e)
+            result["msg"] = str(exc)
+            return result
+
+        if "openshift" not in task_vars:
+            result["failed"] = True
+            result["msg"] = "'openshift' is undefined, did 'openshift_facts' run?"
             return result
 
         result["checks"] = check_results = {}
 
         user_disabled_checks = normalize(task_vars.get('openshift_disable_check', []))
 
-        for check_name in resolved_checks:
-            display.banner("CHECK [{} : {}]".format(check_name, task_vars["ansible_host"]))
-            check = known_checks[check_name]
-
-            if not check.is_active():
-                r = dict(skipped=True, skipped_reason="Not active for this host")
-            elif check_name in user_disabled_checks:
-                r = dict(skipped=True, skipped_reason="Disabled by user request")
-            else:
-                try:
-                    r = check.run()
-                except OpenShiftCheckException as e:
-                    r = dict(
-                        failed=True,
-                        msg=str(e),
-                    )
-
+        for name in resolved_checks:
+            display.banner("CHECK [{} : {}]".format(name, task_vars["ansible_host"]))
+            check = known_checks[name]
+            check_results[name] = run_check(name, check, user_disabled_checks)
             if check.changed:
-                r["changed"] = True
-            check_results[check_name] = r
+                check_results[name]["changed"] = True
 
         result["changed"] = any(r.get("changed") for r in check_results.values())
         if any(r.get("failed") for r in check_results.values()):
@@ -80,22 +78,55 @@ class ActionModule(ActionBase):
         return result
 
     def load_known_checks(self, tmp, task_vars):
+        """Find all existing checks and return a mapping of names to instances."""
         load_checks()
 
         known_checks = {}
         for cls in OpenShiftCheck.subclasses():
-            check_name = cls.name
-            if check_name in known_checks:
-                other_cls = known_checks[check_name].__class__
+            name = cls.name
+            if name in known_checks:
+                other_cls = known_checks[name].__class__
                 raise OpenShiftCheckException(
-                    "non-unique check name '{}' in: '{}.{}' and '{}.{}'".format(
-                        check_name,
-                        cls.__module__, cls.__name__,
-                        other_cls.__module__, other_cls.__name__))
-            known_checks[check_name] = cls(execute_module=self._execute_module, tmp=tmp, task_vars=task_vars)
+                    "duplicate check name '{}' in: '{}' and '{}'"
+                    "".format(name, full_class_name(cls), full_class_name(other_cls))
+                )
+            known_checks[name] = cls(execute_module=self._execute_module, tmp=tmp, task_vars=task_vars)
         return known_checks
 
 
+def list_known_checks(known_checks):
+    """Return text listing the existing checks and tags."""
+    # TODO: we could include a description of each check by taking it from a
+    # check class attribute (e.g., __doc__) when building the message below.
+    msg = (
+        'This playbook is meant to run health checks, but no checks were '
+        'requested. Set the `openshift_checks` variable to a comma-separated '
+        'list of check names or a YAML list. Available checks:\n  {}'
+    ).format('\n  '.join(sorted(known_checks)))
+
+    tags = describe_tags(known_checks.values())
+
+    msg += (
+        '\n\nTags can be used as a shortcut to select multiple '
+        'checks. Available tags and the checks they select:\n  {}'
+    ).format('\n  '.join(tags))
+
+    return msg
+
+
+def describe_tags(check_classes):
+    """Return a sorted list of strings describing tags and the checks they include."""
+    tag_checks = defaultdict(list)
+    for cls in check_classes:
+        for tag in cls.tags:
+            tag_checks[tag].append(cls.name)
+    tags = [
+        '@{} = {}'.format(tag, ','.join(sorted(checks)))
+        for tag, checks in tag_checks.items()
+    ]
+    return sorted(tags)
+
+
 def resolve_checks(names, all_checks):
     """Returns a set of resolved check names.
 
@@ -123,6 +154,12 @@ def resolve_checks(names, all_checks):
         if unknown_tag_names:
             msg.append('Unknown tag names: {}.'.format(', '.join(sorted(unknown_tag_names))))
         msg.append('Make sure there is no typo in the playbook and no files are missing.')
+        # TODO: implement a "Did you mean ...?" when the input is similar to a
+        # valid check or tag.
+        msg.append('Known checks:')
+        msg.append('  {}'.format('\n  '.join(sorted(known_check_names))))
+        msg.append('Known tags:')
+        msg.append('  {}'.format('\n  '.join(describe_tags(all_checks))))
         raise OpenShiftCheckException('\n'.join(msg))
 
     tag_to_checks = defaultdict(set)
@@ -146,3 +183,32 @@ def normalize(checks):
     if isinstance(checks, string_types):
         checks = checks.split(',')
     return [name.strip() for name in checks if name.strip()]
+
+
+def run_check(name, check, user_disabled_checks):
+    """Run a single check if enabled and return a result dict."""
+    if name in user_disabled_checks or '*' in user_disabled_checks:
+        return dict(skipped=True, skipped_reason="Disabled by user request")
+
+    # pylint: disable=broad-except; capturing exceptions broadly is intentional,
+    # to isolate arbitrary failures in one check from others.
+    try:
+        is_active = check.is_active()
+    except Exception as exc:
+        reason = "Could not determine if check should be run, exception: {}".format(exc)
+        return dict(skipped=True, skipped_reason=reason, exception=traceback.format_exc())
+
+    if not is_active:
+        return dict(skipped=True, skipped_reason="Not active for this host")
+
+    try:
+        return check.run()
+    except OpenShiftCheckException as exc:
+        return dict(failed=True, msg=str(exc))
+    except Exception as exc:
+        return dict(failed=True, msg=str(exc), exception=traceback.format_exc())
+
+
+def full_class_name(cls):
+    """Return the name of a class prefixed with its module name."""
+    return '{}.{}'.format(cls.__module__, cls.__name__)
diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
index d10200719..dcaf87eca 100644
--- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
+++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
@@ -1,161 +1,235 @@
-"""
-Ansible callback plugin to give a nicely formatted summary of failures.
-"""
+"""Ansible callback plugin to print a nicely formatted summary of failures.
 
-# Reason: In several locations below we disable pylint protected-access
-#         for Ansible objects that do not give us any public way
-#         to access the full details we need to report check failures.
-# Status: disabled permanently or until Ansible object has a public API.
-# This does leave the code more likely to be broken by future Ansible changes.
+The file / module name is prefixed with `zz_` to make this plugin be loaded last
+by Ansible, thus making its output the last thing that users see.
+"""
 
-from pprint import pformat
+from collections import defaultdict
+import traceback
 
 from ansible.plugins.callback import CallbackBase
 from ansible import constants as C
 from ansible.utils.color import stringc
+from ansible.module_utils.six import string_types
+
+
+FAILED_NO_MSG = u'Failed without returning a message.'
 
 
 class CallbackModule(CallbackBase):
-    """
-    This callback plugin stores task results and summarizes failures.
-    The file name is prefixed with `zz_` to make this plugin be loaded last by
-    Ansible, thus making its output the last thing that users see.
-    """
+    """This callback plugin stores task results and summarizes failures."""
 
     CALLBACK_VERSION = 2.0
     CALLBACK_TYPE = 'aggregate'
     CALLBACK_NAME = 'failure_summary'
     CALLBACK_NEEDS_WHITELIST = False
-    _playbook_file = None
 
     def __init__(self):
         super(CallbackModule, self).__init__()
         self.__failures = []
+        self.__playbook_file = ''
 
     def v2_playbook_on_start(self, playbook):
         super(CallbackModule, self).v2_playbook_on_start(playbook)
-        # re: playbook attrs see top comment  # pylint: disable=protected-access
-        self._playbook_file = playbook._file_name
+        # pylint: disable=protected-access; Ansible gives us no public API to
+        # get the file name of the current playbook from a callback plugin.
+        self.__playbook_file = playbook._file_name
 
     def v2_runner_on_failed(self, result, ignore_errors=False):
         super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors)
         if not ignore_errors:
-            self.__failures.append(dict(result=result, ignore_errors=ignore_errors))
+            self.__failures.append(result)
 
     def v2_playbook_on_stats(self, stats):
         super(CallbackModule, self).v2_playbook_on_stats(stats)
-        if self.__failures:
-            self._print_failure_details(self.__failures)
-
-    def _print_failure_details(self, failures):
-        """Print a summary of failed tasks or checks."""
-        self._display.display(u'\nFailure summary:\n')
-
-        width = len(str(len(failures)))
-        initial_indent_format = u'  {{:>{width}}}. '.format(width=width)
-        initial_indent_len = len(initial_indent_format.format(0))
-        subsequent_indent = u' ' * initial_indent_len
-        subsequent_extra_indent = u' ' * (initial_indent_len + 10)
-
-        for i, failure in enumerate(failures, 1):
-            entries = _format_failure(failure)
-            self._display.display(u'\n{}{}'.format(initial_indent_format.format(i), entries[0]))
-            for entry in entries[1:]:
-                entry = entry.replace(u'\n', u'\n' + subsequent_extra_indent)
-                indented = u'{}{}'.format(subsequent_indent, entry)
-                self._display.display(indented)
-
-        failed_checks = set()
-        playbook_context = None
-        # re: result attrs see top comment  # pylint: disable=protected-access
-        for failure in failures:
-            # Get context from check task result since callback plugins cannot access task vars.
-            # NOTE: thus context is not known unless checks run. Failures prior to checks running
-            # don't have playbook_context in the results. But we only use it now when checks fail.
-            playbook_context = playbook_context or failure['result']._result.get('playbook_context')
-            failed_checks.update(
-                name
-                for name, result in failure['result']._result.get('checks', {}).items()
-                if result.get('failed')
-            )
-        if failed_checks:
-            self._print_check_failure_summary(failed_checks, playbook_context)
-
-    def _print_check_failure_summary(self, failed_checks, context):
-        checks = ','.join(sorted(failed_checks))
-        # The purpose of specifying context is to vary the output depending on what the user was
-        # expecting to happen (based on which playbook they ran). The only use currently is to
-        # vary the message depending on whether the user was deliberately running checks or was
-        # trying to install/upgrade and checks are just included. Other use cases may arise.
-        summary = (  # default to explaining what checks are in the first place
-            '\n'
-            'The execution of "{playbook}"\n'
-            'includes checks designed to fail early if the requirements\n'
-            'of the playbook are not met. One or more of these checks\n'
-            'failed. To disregard these results, you may choose to\n'
-            'disable failing checks by setting an Ansible variable:\n\n'
-            '   openshift_disable_check={checks}\n\n'
-            'Failing check names are shown in the failure details above.\n'
-            'Some checks may be configurable by variables if your requirements\n'
-            'are different from the defaults; consult check documentation.\n'
-            'Variables can be set in the inventory or passed on the\n'
-            'command line using the -e flag to ansible-playbook.\n\n'
-        ).format(playbook=self._playbook_file, checks=checks)
-        if context in ['pre-install', 'health']:
-            summary = (  # user was expecting to run checks, less explanation needed
-                '\n'
-                'You may choose to configure or disable failing checks by\n'
-                'setting Ansible variables. To disable those above:\n\n'
-                '    openshift_disable_check={checks}\n\n'
-                'Consult check documentation for configurable variables.\n'
-                'Variables can be set in the inventory or passed on the\n'
-                'command line using the -e flag to ansible-playbook.\n\n'
-            ).format(checks=checks)
-        self._display.display(summary)
-
-
-# re: result attrs see top comment  # pylint: disable=protected-access
-def _format_failure(failure):
+        # pylint: disable=broad-except; capturing exceptions broadly is
+        # intentional, to isolate arbitrary failures in this callback plugin.
+        try:
+            if self.__failures:
+                self._display.display(failure_summary(self.__failures, self.__playbook_file))
+        except Exception:
+            msg = stringc(
+                u'An error happened while generating a summary of failures:\n'
+                u'{}'.format(traceback.format_exc()), C.COLOR_WARN)
+            self._display.v(msg)
+
+
+def failure_summary(failures, playbook):
+    """Return a summary of failed tasks, including details on health checks."""
+    if not failures:
+        return u''
+
+    # NOTE: because we don't have access to task_vars from callback plugins, we
+    # store the playbook context in the task result when the
+    # openshift_health_check action plugin is used, and we use this context to
+    # customize the error message.
+    # pylint: disable=protected-access; Ansible gives us no sufficient public
+    # API on TaskResult objects.
+    context = next((
+        context for context in
+        (failure._result.get('playbook_context') for failure in failures)
+        if context
+    ), None)
+
+    failures = [failure_to_dict(failure) for failure in failures]
+    failures = deduplicate_failures(failures)
+
+    summary = [u'', u'', u'Failure summary:', u'']
+
+    width = len(str(len(failures)))
+    initial_indent_format = u'  {{:>{width}}}. '.format(width=width)
+    initial_indent_len = len(initial_indent_format.format(0))
+    subsequent_indent = u' ' * initial_indent_len
+    subsequent_extra_indent = u' ' * (initial_indent_len + 10)
+
+    for i, failure in enumerate(failures, 1):
+        entries = format_failure(failure)
+        summary.append(u'\n{}{}'.format(initial_indent_format.format(i), entries[0]))
+        for entry in entries[1:]:
+            entry = entry.replace(u'\n', u'\n' + subsequent_extra_indent)
+            indented = u'{}{}'.format(subsequent_indent, entry)
+            summary.append(indented)
+
+    failed_checks = set()
+    for failure in failures:
+        failed_checks.update(name for name, message in failure['checks'])
+    if failed_checks:
+        summary.append(check_failure_footer(failed_checks, context, playbook))
+
+    return u'\n'.join(summary)
+
+
+def failure_to_dict(failed_task_result):
+    """Extract information out of a failed TaskResult into a dict.
+
+    The intent is to transform a TaskResult object into something easier to
+    manipulate. TaskResult is ansible.executor.task_result.TaskResult.
+    """
+    # pylint: disable=protected-access; Ansible gives us no sufficient public
+    # API on TaskResult objects.
+    _result = failed_task_result._result
+    return {
+        'host': failed_task_result._host.get_name(),
+        'play': play_name(failed_task_result._task),
+        'task': failed_task_result.task_name,
+        'msg': _result.get('msg', FAILED_NO_MSG),
+        'checks': tuple(
+            (name, result.get('msg', FAILED_NO_MSG))
+            for name, result in sorted(_result.get('checks', {}).items())
+            if result.get('failed')
+        ),
+    }
+
+
+def play_name(obj):
+    """Given a task or block, return the name of its parent play.
+
+    This is loosely inspired by ansible.playbook.base.Base.dump_me.
+    """
+    # pylint: disable=protected-access; Ansible gives us no sufficient public
+    # API to implement this.
+    if not obj:
+        return ''
+    if hasattr(obj, '_play'):
+        return obj._play.get_name()
+    return play_name(getattr(obj, '_parent'))
+
+
+def deduplicate_failures(failures):
+    """Group together similar failures from different hosts.
+
+    Returns a new list of failures such that identical failures from different
+    hosts are grouped together in a single entry. The relative order of failures
+    is preserved.
+
+    If failures is unhashable, the original list of failures is returned.
+    """
+    groups = defaultdict(list)
+    for failure in failures:
+        group_key = tuple(sorted((key, value) for key, value in failure.items() if key != 'host'))
+        try:
+            groups[group_key].append(failure)
+        except TypeError:
+            # abort and return original list of failures when failures has an
+            # unhashable type.
+            return failures
+
+    result = []
+    for failure in failures:
+        group_key = tuple(sorted((key, value) for key, value in failure.items() if key != 'host'))
+        if group_key not in groups:
+            continue
+        failure['host'] = tuple(sorted(g_failure['host'] for g_failure in groups.pop(group_key)))
+        result.append(failure)
+    return result
+
+
+def format_failure(failure):
     """Return a list of pretty-formatted text entries describing a failure, including
     relevant information about it. Expect that the list of text entries will be joined
     by a newline separator when output to the user."""
-    result = failure['result']
-    host = result._host.get_name()
-    play = _get_play(result._task)
-    if play:
-        play = play.get_name()
-    task = result._task.get_name()
-    msg = result._result.get('msg', u'???')
+    if isinstance(failure['host'], string_types):
+        host = failure['host']
+    else:
+        host = u', '.join(failure['host'])
+    play = failure['play']
+    task = failure['task']
+    msg = failure['msg']
+    checks = failure['checks']
     fields = (
-        (u'Host', host),
+        (u'Hosts', host),
         (u'Play', play),
         (u'Task', task),
         (u'Message', stringc(msg, C.COLOR_ERROR)),
     )
-    if 'checks' in result._result:
-        fields += ((u'Details', _format_failed_checks(result._result['checks'])),)
+    if checks:
+        fields += ((u'Details', format_failed_checks(checks)),)
     row_format = '{:10}{}'
     return [row_format.format(header + u':', body) for header, body in fields]
 
 
-def _format_failed_checks(checks):
+def format_failed_checks(checks):
     """Return pretty-formatted text describing checks that failed."""
-    failed_check_msgs = []
-    for check, body in checks.items():
-        if body.get('failed', False):   # only show the failed checks
-            msg = body.get('msg', u"Failed without returning a message")
-            failed_check_msgs.append('check "%s":\n%s' % (check, msg))
-    if failed_check_msgs:
-        return stringc("\n\n".join(failed_check_msgs), C.COLOR_ERROR)
-    else:    # something failed but no checks will admit to it, so dump everything
-        return stringc(pformat(checks), C.COLOR_ERROR)
-
-
-# This is inspired by ansible.playbook.base.Base.dump_me.
-# re: play/task/block attrs see top comment  # pylint: disable=protected-access
-def _get_play(obj):
-    """Given a task or block, recursively try to find its parent play."""
-    if hasattr(obj, '_play'):
-        return obj._play
-    if getattr(obj, '_parent'):
-        return _get_play(obj._parent)
+    messages = []
+    for name, message in checks:
+        messages.append(u'check "{}":\n{}'.format(name, message))
+    return stringc(u'\n\n'.join(messages), C.COLOR_ERROR)
+
+
+def check_failure_footer(failed_checks, context, playbook):
+    """Return a textual explanation about checks depending on context.
+
+    The purpose of specifying context is to vary the output depending on what
+    the user was expecting to happen (based on which playbook they ran). The
+    only use currently is to vary the message depending on whether the user was
+    deliberately running checks or was trying to install/upgrade and checks are
+    just included. Other use cases may arise.
+    """
+    checks = ','.join(sorted(failed_checks))
+    summary = [u'']
+    if context in ['pre-install', 'health', 'adhoc']:
+        # User was expecting to run checks, less explanation needed.
+        summary.extend([
+            u'You may configure or disable checks by setting Ansible '
+            u'variables. To disable those above, set:',
+            u'    openshift_disable_check={checks}'.format(checks=checks),
+            u'Consult check documentation for configurable variables.',
+        ])
+    else:
+        # User may not be familiar with the checks, explain what checks are in
+        # the first place.
+        summary.extend([
+            u'The execution of "{playbook}" includes checks designed to fail '
+            u'early if the requirements of the playbook are not met. One or '
+            u'more of these checks failed. To disregard these results,'
+            u'explicitly disable checks by setting an Ansible variable:'.format(playbook=playbook),
+            u'   openshift_disable_check={checks}'.format(checks=checks),
+            u'Failing check names are shown in the failure details above. '
+            u'Some checks may be configurable by variables if your requirements '
+            u'are different from the defaults; consult check documentation.',
+        ])
+    summary.append(
+        u'Variables can be set in the inventory or passed on the command line '
+        u'using the -e flag to ansible-playbook.'
+    )
+    return u'\n'.join(summary)
diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py
index f9babebb9..db3c0b654 100644
--- a/roles/openshift_health_checker/library/aos_version.py
+++ b/roles/openshift_health_checker/library/aos_version.py
@@ -24,11 +24,17 @@ from ansible.module_utils.basic import AnsibleModule
 # Python 3, we use six for cross compatibility in this module alone:
 from ansible.module_utils.six import string_types
 
-IMPORT_EXCEPTION = None
+YUM_IMPORT_EXCEPTION = None
+DNF_IMPORT_EXCEPTION = None
 try:
     import yum  # pylint: disable=import-error
 except ImportError as err:
-    IMPORT_EXCEPTION = err
+    YUM_IMPORT_EXCEPTION = err
+
+try:
+    import dnf  # pylint: disable=import-error
+except ImportError as err:
+    DNF_IMPORT_EXCEPTION = err
 
 
 class AosVersionException(Exception):
@@ -43,12 +49,20 @@ def main():
     module = AnsibleModule(
         argument_spec=dict(
             package_list=dict(type="list", required=True),
+            package_mgr=dict(type="str", required=True),
         ),
         supports_check_mode=True
     )
 
-    if IMPORT_EXCEPTION:
-        module.fail_json(msg="aos_version module could not import yum: %s" % IMPORT_EXCEPTION)
+    # determine the package manager to use
+    package_mgr = module.params['package_mgr']
+    if package_mgr not in ('yum', 'dnf'):
+        module.fail_json(msg="package_mgr must be one of: yum, dnf")
+    pkg_mgr_exception = dict(yum=YUM_IMPORT_EXCEPTION, dnf=DNF_IMPORT_EXCEPTION)[package_mgr]
+    if pkg_mgr_exception:
+        module.fail_json(
+            msg="aos_version module could not import {}: {}".format(package_mgr, pkg_mgr_exception)
+        )
 
     # determine the packages we will look for
     package_list = module.params['package_list']
@@ -67,7 +81,7 @@ def main():
 
     # get the list of packages available and complain if anything is wrong
     try:
-        pkgs = _retrieve_available_packages(expected_pkg_names)
+        pkgs = _retrieve_available_packages(package_mgr, expected_pkg_names)
         if versioned_pkgs:
             _check_precise_version_found(pkgs, _to_dict(versioned_pkgs))
             _check_higher_version_found(pkgs, _to_dict(versioned_pkgs))
@@ -82,10 +96,7 @@ def _to_dict(pkg_list):
     return {pkg["name"]: pkg for pkg in pkg_list}
 
 
-def _retrieve_available_packages(expected_pkgs):
-    # search for package versions available for openshift pkgs
-    yb = yum.YumBase()  # pylint: disable=invalid-name
-
+def _retrieve_available_packages(pkg_mgr, expected_pkgs):
     # The openshift excluder prevents unintended updates to openshift
     # packages by setting yum excludes on those packages. See:
     # https://wiki.centos.org/SpecialInterestGroup/PaaS/OpenShift-Origin-Control-Updates
@@ -94,17 +105,45 @@ def _retrieve_available_packages(expected_pkgs):
     # attempt to determine what packages are available via yum they may
     # be excluded. So, for our purposes here, disable excludes to see
     # what will really be available during an install or upgrade.
-    yb.conf.disable_excludes = ['all']
 
-    try:
-        pkgs = yb.pkgSack.returnPackages(patterns=expected_pkgs)
-    except yum.Errors.PackageSackError as excinfo:
-        # you only hit this if *none* of the packages are available
-        raise AosVersionException('\n'.join([
-            'Unable to find any OpenShift packages.',
-            'Check your subscription and repo settings.',
-            str(excinfo),
-        ]))
+    if pkg_mgr == "yum":
+        # search for package versions available for openshift pkgs
+        yb = yum.YumBase()  # pylint: disable=invalid-name
+
+        yb.conf.disable_excludes = ['all']
+
+        try:
+            pkgs = yb.rpmdb.returnPackages(patterns=expected_pkgs)
+            pkgs += yb.pkgSack.returnPackages(patterns=expected_pkgs)
+        except yum.Errors.PackageSackError as excinfo:
+            # you only hit this if *none* of the packages are available
+            raise AosVersionException('\n'.join([
+                'Unable to find any OpenShift packages.',
+                'Check your subscription and repo settings.',
+                str(excinfo),
+            ]))
+    elif pkg_mgr == "dnf":
+        dbase = dnf.Base()  # pyling: disable=invalid-name
+
+        dbase.conf.disable_excludes = ['all']
+        dbase.read_all_repos()
+        dbase.fill_sack(load_system_repo=False, load_available_repos=True)
+
+        dquery = dbase.sack.query()
+        aquery = dquery.available()
+        iquery = dquery.installed()
+
+        available_pkgs = list(aquery.filter(name=expected_pkgs))
+        installed_pkgs = list(iquery.filter(name=expected_pkgs))
+        pkgs = available_pkgs + installed_pkgs
+
+        if not pkgs:
+            # pkgs list is empty, raise because no expected packages found
+            raise AosVersionException('\n'.join([
+                'Unable to find any OpenShift packages.',
+                'Check your subscription and repo settings.',
+            ]))
+
     return pkgs
 
 
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py
index 07ec6f7ef..987c955b6 100644
--- a/roles/openshift_health_checker/openshift_checks/__init__.py
+++ b/roles/openshift_health_checker/openshift_checks/__init__.py
@@ -4,6 +4,7 @@ Health checks for OpenShift clusters.
 
 import operator
 import os
+import time
 
 from abc import ABCMeta, abstractmethod, abstractproperty
 from importlib import import_module
@@ -57,6 +58,9 @@ class OpenShiftCheck(object):
         self._execute_module = execute_module
         self.task_vars = task_vars or {}
         self.tmp = tmp
+        # mainly for testing purposes; see execute_module_with_retries
+        self._module_retries = 3
+        self._module_retry_interval = 5  # seconds
 
         # set to True when the check changes the host, for accurate total "changed" count
         self.changed = False
@@ -115,6 +119,19 @@ class OpenShiftCheck(object):
             )
         return self._execute_module(module_name, module_args, self.tmp, self.task_vars)
 
+    def execute_module_with_retries(self, module_name, module_args):
+        """Run execute_module and retry on failure."""
+        result = {}
+        tries = 0
+        while True:
+            res = self.execute_module(module_name, module_args)
+            if tries > self._module_retries or not res.get("failed"):
+                result.update(res)
+                return result
+            result["last_failed"] = res
+            tries += 1
+            time.sleep(self._module_retry_interval)
+
     def get_var(self, *keys, **kwargs):
         """Get deeply nested values from task_vars.
 
@@ -242,7 +259,7 @@ def load_checks(path=None, subpkg=""):
             modules = modules + load_checks(os.path.join(path, name), subpkg + "." + name)
             continue
 
-        if name.endswith(".py") and name not in LOADER_EXCLUDES:
+        if name.endswith(".py") and not name.startswith(".") and name not in LOADER_EXCLUDES:
             modules.append(import_module(__package__ + subpkg + "." + name[:-3]))
 
     return modules
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py
index 6d1dea9ce..f302fd14b 100644
--- a/roles/openshift_health_checker/openshift_checks/disk_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py
@@ -115,10 +115,7 @@ class DiskAvailability(OpenShiftCheck):
 
                 return {
                     'failed': True,
-                    'msg': (
-                        'Available disk space in "{}" ({:.1f} GB) '
-                        'is below minimum recommended ({:.1f} GB)'
-                    ).format(path, free_gb, recommended_gb)
+                    'msg': msg,
                 }
 
         return {}
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 85a922f86..9c35f0f92 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -32,6 +32,12 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
     # we use python-docker-py to check local docker for images, and skopeo
     # to look for images available remotely without waiting to pull them.
     dependencies = ["python-docker-py", "skopeo"]
+    skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false docker://{registry}/{image}"
+
+    def __init__(self, *args, **kwargs):
+        super(DockerImageAvailability, self).__init__(*args, **kwargs)
+        # record whether we could reach a registry or not (and remember results)
+        self.reachable_registries = {}
 
     def is_active(self):
         """Skip hosts with unsupported deployment types."""
@@ -63,13 +69,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
         unavailable_images = set(missing_images) - set(available_images)
 
         if unavailable_images:
-            return {
-                "failed": True,
-                "msg": (
-                    "One or more required Docker images are not available:\n    {}\n"
-                    "Configured registries: {}"
-                ).format(",\n    ".join(sorted(unavailable_images)), ", ".join(registries)),
-            }
+            registries = [
+                reg if self.reachable_registries.get(reg, True) else reg + " (unreachable)"
+                for reg in registries
+            ]
+            msg = (
+                "One or more required Docker images are not available:\n    {}\n"
+                "Configured registries: {}\n"
+                "Checked by: {}"
+            ).format(
+                ",\n    ".join(sorted(unavailable_images)),
+                ", ".join(registries),
+                self.skopeo_img_check_command
+            )
+
+            return dict(failed=True, msg=msg)
 
         return {}
 
@@ -125,31 +139,31 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
 
     def local_images(self, images):
         """Filter a list of images and return those available locally."""
-        return [
-            image for image in images
-            if self.is_image_local(image)
-        ]
+        registries = self.known_docker_registries()
+        found_images = []
+        for image in images:
+            # docker could have the image name as-is or prefixed with any registry
+            imglist = [image] + [reg + "/" + image for reg in registries]
+            if self.is_image_local(imglist):
+                found_images.append(image)
+        return found_images
 
     def is_image_local(self, image):
         """Check if image is already in local docker index."""
         result = self.execute_module("docker_image_facts", {"name": image})
-        if result.get("failed", False):
-            return False
-
-        return bool(result.get("images", []))
+        return bool(result.get("images")) and not result.get("failed")
 
     def known_docker_registries(self):
         """Build a list of docker registries available according to inventory vars."""
-        docker_facts = self.get_var("openshift", "docker")
-        regs = set(docker_facts["additional_registries"])
+        regs = list(self.get_var("openshift.docker.additional_registries", default=[]))
 
         deployment_type = self.get_var("openshift_deployment_type")
-        if deployment_type == "origin":
-            regs.update(["docker.io"])
-        elif "enterprise" in deployment_type:
-            regs.update(["registry.access.redhat.com"])
+        if deployment_type == "origin" and "docker.io" not in regs:
+            regs.append("docker.io")
+        elif "enterprise" in deployment_type and "registry.access.redhat.com" not in regs:
+            regs.append("registry.access.redhat.com")
 
-        return list(regs)
+        return regs
 
     def available_images(self, images, default_registries):
         """Search remotely for images. Returns: list of images found."""
@@ -162,15 +176,35 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
         """Use Skopeo to determine if required image exists in known registry(s)."""
         registries = default_registries
 
-        # if image already includes a registry, only use that
+        # If image already includes a registry, only use that.
+        # NOTE: This logic would incorrectly identify images that do not use a namespace, e.g.
+        # registry.access.redhat.com/rhel7 as if the registry were a namespace.
+        # It's not clear that there's any way to distinguish them, but fortunately
+        # the current set of images all look like [registry/]namespace/name[:version].
         if image.count("/") > 1:
             registry, image = image.split("/", 1)
             registries = [registry]
 
         for registry in registries:
-            args = {"_raw_params": "skopeo inspect --tls-verify=false docker://{}/{}".format(registry, image)}
-            result = self.execute_module("command", args)
+            if registry not in self.reachable_registries:
+                self.reachable_registries[registry] = self.connect_to_registry(registry)
+            if not self.reachable_registries[registry]:
+                continue
+
+            args = {"_raw_params": self.skopeo_img_check_command.format(registry=registry, image=image)}
+            result = self.execute_module_with_retries("command", args)
             if result.get("rc", 0) == 0 and not result.get("failed"):
                 return True
+            if result.get("rc") == 124:  # RC 124 == timed out; mark unreachable
+                self.reachable_registries[registry] = False
 
         return False
+
+    def connect_to_registry(self, registry):
+        """Use ansible wait_for module to test connectivity from host to registry. Returns bool."""
+        # test a simple TCP connection
+        host, _, port = registry.partition(":")
+        port = port or 443
+        args = dict(host=host, port=port, state="started", timeout=30)
+        result = self.execute_module("wait_for", args)
+        return result.get("rc", 0) == 0 and not result.get("failed")
diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py
index e9bae60a3..24f1d938a 100644
--- a/roles/openshift_health_checker/openshift_checks/mixins.py
+++ b/roles/openshift_health_checker/openshift_checks/mixins.py
@@ -36,7 +36,7 @@ class DockerHostMixin(object):
 
         # NOTE: we would use the "package" module but it's actually an action plugin
         # and it's not clear how to invoke one of those. This is about the same anyway:
-        result = self.execute_module(
+        result = self.execute_module_with_retries(
             self.get_var("ansible_pkg_mgr", default="yum"),
             {"name": self.dependencies, "state": "present"},
         )
diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py
index a86180b00..21355c2f0 100644
--- a/roles/openshift_health_checker/openshift_checks/package_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/package_availability.py
@@ -26,7 +26,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
             packages.update(self.node_packages(rpm_prefix))
 
         args = {"packages": sorted(set(packages))}
-        return self.execute_module("check_yum_update", args)
+        return self.execute_module_with_retries("check_yum_update", args)
 
     @staticmethod
     def master_packages(rpm_prefix):
diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py
index 1e9aecbe0..8464e8a5e 100644
--- a/roles/openshift_health_checker/openshift_checks/package_update.py
+++ b/roles/openshift_health_checker/openshift_checks/package_update.py
@@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck):
 
     def run(self):
         args = {"packages": []}
-        return self.execute_module("check_yum_update", args)
+        return self.execute_module_with_retries("check_yum_update", args)
diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py
index 8b780114f..d4aec3ed8 100644
--- a/roles/openshift_health_checker/openshift_checks/package_version.py
+++ b/roles/openshift_health_checker/openshift_checks/package_version.py
@@ -46,6 +46,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
         check_multi_minor_release = deployment_type in ['openshift-enterprise']
 
         args = {
+            "package_mgr": self.get_var("ansible_pkg_mgr"),
             "package_list": [
                 {
                     "name": "openvswitch",
@@ -75,7 +76,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
             ],
         }
 
-        return self.execute_module("aos_version", args)
+        return self.execute_module_with_retries("aos_version", args)
 
     def get_required_ovs_version(self):
         """Return the correct Open vSwitch version(s) for the current OpenShift version."""
diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py
index f5161d6f5..58864da21 100644
--- a/roles/openshift_health_checker/test/action_plugin_test.py
+++ b/roles/openshift_health_checker/test/action_plugin_test.py
@@ -80,7 +80,8 @@ def skipped(result):
     None,
     {},
 ])
-def test_action_plugin_missing_openshift_facts(plugin, task_vars):
+def test_action_plugin_missing_openshift_facts(plugin, task_vars, monkeypatch):
+    monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
     result = plugin.run(tmp=None, task_vars=task_vars)
 
     assert failed(result, msg_has=['openshift_facts'])
@@ -94,7 +95,7 @@ def test_action_plugin_cannot_load_checks_with_the_same_name(plugin, task_vars,
 
     result = plugin.run(tmp=None, task_vars=task_vars)
 
-    assert failed(result, msg_has=['unique', 'duplicate_name', 'FakeCheck'])
+    assert failed(result, msg_has=['duplicate', 'duplicate_name', 'FakeCheck'])
 
 
 def test_action_plugin_skip_non_active_checks(plugin, task_vars, monkeypatch):
@@ -109,11 +110,16 @@ def test_action_plugin_skip_non_active_checks(plugin, task_vars, monkeypatch):
     assert not skipped(result)
 
 
-def test_action_plugin_skip_disabled_checks(plugin, task_vars, monkeypatch):
+@pytest.mark.parametrize('to_disable', [
+    'fake_check',
+    ['fake_check', 'spam'],
+    '*,spam,eggs',
+])
+def test_action_plugin_skip_disabled_checks(to_disable, plugin, task_vars, monkeypatch):
     checks = [fake_check('fake_check', is_active=True)]
     monkeypatch.setattr('openshift_checks.OpenShiftCheck.subclasses', classmethod(lambda cls: checks))
 
-    task_vars['openshift_disable_check'] = 'fake_check'
+    task_vars['openshift_disable_check'] = to_disable
     result = plugin.run(tmp=None, task_vars=task_vars)
 
     assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Disabled by user request")
@@ -217,24 +223,21 @@ def test_resolve_checks_ok(names, all_checks, expected):
     assert resolve_checks(names, all_checks) == expected
 
 
-@pytest.mark.parametrize('names,all_checks,words_in_exception,words_not_in_exception', [
+@pytest.mark.parametrize('names,all_checks,words_in_exception', [
     (
         ['testA', 'testB'],
         [],
         ['check', 'name', 'testA', 'testB'],
-        ['tag', 'group', '@'],
     ),
     (
         ['@group'],
         [],
         ['tag', 'name', 'group'],
-        ['check', '@'],
     ),
     (
         ['testA', 'testB', '@group'],
         [],
         ['check', 'name', 'testA', 'testB', 'tag', 'group'],
-        ['@'],
     ),
     (
         ['testA', 'testB', '@group'],
@@ -244,13 +247,10 @@ def test_resolve_checks_ok(names, all_checks, expected):
             fake_check('from_group_2', ['preflight', 'group']),
         ],
         ['check', 'name', 'testA', 'testB'],
-        ['tag', 'group', '@'],
     ),
 ])
-def test_resolve_checks_failure(names, all_checks, words_in_exception, words_not_in_exception):
+def test_resolve_checks_failure(names, all_checks, words_in_exception):
     with pytest.raises(Exception) as excinfo:
         resolve_checks(names, all_checks)
     for word in words_in_exception:
         assert word in str(excinfo.value)
-    for word in words_not_in_exception:
-        assert word not in str(excinfo.value)
diff --git a/roles/openshift_health_checker/test/conftest.py b/roles/openshift_health_checker/test/conftest.py
index 3cbd65507..244a1f0fa 100644
--- a/roles/openshift_health_checker/test/conftest.py
+++ b/roles/openshift_health_checker/test/conftest.py
@@ -7,5 +7,6 @@ openshift_health_checker_path = os.path.dirname(os.path.dirname(__file__))
 sys.path[1:1] = [
     openshift_health_checker_path,
     os.path.join(openshift_health_checker_path, 'action_plugins'),
+    os.path.join(openshift_health_checker_path, 'callback_plugins'),
     os.path.join(openshift_health_checker_path, 'library'),
 ]
diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py
index 8d0a53df9..6a7c16c7e 100644
--- a/roles/openshift_health_checker/test/docker_image_availability_test.py
+++ b/roles/openshift_health_checker/test/docker_image_availability_test.py
@@ -3,6 +3,23 @@ import pytest
 from openshift_checks.docker_image_availability import DockerImageAvailability
 
 
+@pytest.fixture()
+def task_vars():
+    return dict(
+        openshift=dict(
+            common=dict(
+                service_type='origin',
+                is_containerized=False,
+                is_atomic=False,
+            ),
+            docker=dict(),
+        ),
+        openshift_deployment_type='origin',
+        openshift_image_tag='',
+        group_names=['nodes', 'masters'],
+    )
+
+
 @pytest.mark.parametrize('deployment_type, is_containerized, group_names, expect_active', [
     ("origin", True, [], True),
     ("openshift-enterprise", True, [], True),
@@ -15,12 +32,10 @@ from openshift_checks.docker_image_availability import DockerImageAvailability
     ("origin", False, ["nodes", "masters"], True),
     ("openshift-enterprise", False, ["etcd"], False),
 ])
-def test_is_active(deployment_type, is_containerized, group_names, expect_active):
-    task_vars = dict(
-        openshift=dict(common=dict(is_containerized=is_containerized)),
-        openshift_deployment_type=deployment_type,
-        group_names=group_names,
-    )
+def test_is_active(task_vars, deployment_type, is_containerized, group_names, expect_active):
+    task_vars['openshift_deployment_type'] = deployment_type
+    task_vars['openshift']['common']['is_containerized'] = is_containerized
+    task_vars['group_names'] = group_names
     assert DockerImageAvailability(None, task_vars).is_active() == expect_active
 
 
@@ -30,10 +45,10 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active
     (True, False),
     (False, True),
 ])
-def test_all_images_available_locally(is_containerized, is_atomic):
+def test_all_images_available_locally(task_vars, is_containerized, is_atomic):
     def execute_module(module_name, module_args, *_):
         if module_name == "yum":
-            return {"changed": True}
+            return {}
 
         assert module_name == "docker_image_facts"
         assert 'name' in module_args
@@ -42,19 +57,9 @@ def test_all_images_available_locally(is_containerized, is_atomic):
             'images': [module_args['name']],
         }
 
-    result = DockerImageAvailability(execute_module, task_vars=dict(
-        openshift=dict(
-            common=dict(
-                service_type='origin',
-                is_containerized=is_containerized,
-                is_atomic=is_atomic,
-            ),
-            docker=dict(additional_registries=["docker.io"]),
-        ),
-        openshift_deployment_type='origin',
-        openshift_image_tag='3.4',
-        group_names=['nodes', 'masters'],
-    )).run()
+    task_vars['openshift']['common']['is_containerized'] = is_containerized
+    task_vars['openshift']['common']['is_atomic'] = is_atomic
+    result = DockerImageAvailability(execute_module, task_vars).run()
 
     assert not result.get('failed', False)
 
@@ -63,53 +68,36 @@ def test_all_images_available_locally(is_containerized, is_atomic):
     False,
     True,
 ])
-def test_all_images_available_remotely(available_locally):
+def test_all_images_available_remotely(task_vars, available_locally):
     def execute_module(module_name, *_):
         if module_name == 'docker_image_facts':
             return {'images': [], 'failed': available_locally}
-        return {'changed': False}
+        return {}
 
-    result = DockerImageAvailability(execute_module, task_vars=dict(
-        openshift=dict(
-            common=dict(
-                service_type='origin',
-                is_containerized=False,
-                is_atomic=False,
-            ),
-            docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]),
-        ),
-        openshift_deployment_type='origin',
-        openshift_image_tag='v3.4',
-        group_names=['nodes', 'masters'],
-    )).run()
+    task_vars['openshift']['docker']['additional_registries'] = ["docker.io", "registry.access.redhat.com"]
+    task_vars['openshift_image_tag'] = 'v3.4'
+    check = DockerImageAvailability(execute_module, task_vars)
+    check._module_retry_interval = 0
+    result = check.run()
 
     assert not result.get('failed', False)
 
 
-def test_all_images_unavailable():
-    def execute_module(module_name=None, *_):
-        if module_name == "command":
-            return {
-                'failed': True,
-            }
+def test_all_images_unavailable(task_vars):
+    def execute_module(module_name=None, *args):
+        if module_name == "wait_for":
+            return {}
+        elif module_name == "command":
+            return {'failed': True}
 
-        return {
-            'changed': False,
-        }
+        return {}  # docker_image_facts failure
 
-    actual = DockerImageAvailability(execute_module, task_vars=dict(
-        openshift=dict(
-            common=dict(
-                service_type='origin',
-                is_containerized=False,
-                is_atomic=False,
-            ),
-            docker=dict(additional_registries=["docker.io"]),
-        ),
-        openshift_deployment_type="openshift-enterprise",
-        openshift_image_tag='latest',
-        group_names=['nodes', 'masters'],
-    )).run()
+    task_vars['openshift']['docker']['additional_registries'] = ["docker.io"]
+    task_vars['openshift_deployment_type'] = "openshift-enterprise"
+    task_vars['openshift_image_tag'] = 'latest'
+    check = DockerImageAvailability(execute_module, task_vars)
+    check._module_retry_interval = 0
+    actual = check.run()
 
     assert actual['failed']
     assert "required Docker images are not available" in actual['msg']
@@ -125,62 +113,63 @@ def test_all_images_unavailable():
         ["dependencies can be installed via `yum`"]
     ),
 ])
-def test_skopeo_update_failure(message, extra_words):
+def test_skopeo_update_failure(task_vars, message, extra_words):
     def execute_module(module_name=None, *_):
         if module_name == "yum":
             return {
                 "failed": True,
                 "msg": message,
-                "changed": False,
             }
 
-        return {'changed': False}
+        return {}
 
-    actual = DockerImageAvailability(execute_module, task_vars=dict(
-        openshift=dict(
-            common=dict(
-                service_type='origin',
-                is_containerized=False,
-                is_atomic=False,
-            ),
-            docker=dict(additional_registries=["unknown.io"]),
-        ),
-        openshift_deployment_type="openshift-enterprise",
-        openshift_image_tag='',
-        group_names=['nodes', 'masters'],
-    )).run()
+    task_vars['openshift']['docker']['additional_registries'] = ["unknown.io"]
+    task_vars['openshift_deployment_type'] = "openshift-enterprise"
+    check = DockerImageAvailability(execute_module, task_vars)
+    check._module_retry_interval = 0
+    actual = check.run()
 
     assert actual["failed"]
     for word in extra_words:
         assert word in actual["msg"]
 
 
-@pytest.mark.parametrize("deployment_type,registries", [
-    ("origin", ["unknown.io"]),
-    ("openshift-enterprise", ["registry.access.redhat.com"]),
-    ("openshift-enterprise", []),
-])
-def test_registry_availability(deployment_type, registries):
+@pytest.mark.parametrize(
+    "image, registries, connection_test_failed, skopeo_failed, "
+    "expect_success, expect_registries_reached", [
+        (
+            "spam/eggs:v1", ["test.reg"],
+            True, True,
+            False,
+            {"test.reg": False},
+        ),
+        (
+            "spam/eggs:v1", ["test.reg"],
+            False, True,
+            False,
+            {"test.reg": True},
+        ),
+        (
+            "eggs.reg/spam/eggs:v1", ["test.reg"],
+            False, False,
+            True,
+            {"eggs.reg": True},
+        ),
+    ])
+def test_registry_availability(image, registries, connection_test_failed, skopeo_failed,
+                               expect_success, expect_registries_reached):
     def execute_module(module_name=None, *_):
-        return {
-            'changed': False,
-        }
+        if module_name == "wait_for":
+            return dict(msg="msg", failed=connection_test_failed)
+        elif module_name == "command":
+            return dict(msg="msg", failed=skopeo_failed)
 
-    actual = DockerImageAvailability(execute_module, task_vars=dict(
-        openshift=dict(
-            common=dict(
-                service_type='origin',
-                is_containerized=False,
-                is_atomic=False,
-            ),
-            docker=dict(additional_registries=registries),
-        ),
-        openshift_deployment_type=deployment_type,
-        openshift_image_tag='',
-        group_names=['nodes', 'masters'],
-    )).run()
+    check = DockerImageAvailability(execute_module, task_vars())
+    check._module_retry_interval = 0
 
-    assert not actual.get("failed", False)
+    available = check.is_available_skopeo_image(image, registries)
+    assert available == expect_success
+    assert expect_registries_reached == check.reachable_registries
 
 
 @pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [
@@ -257,7 +246,7 @@ def test_required_images(deployment_type, is_containerized, groups, oreg_url, ex
         openshift_image_tag='vtest',
     )
 
-    assert expected == DockerImageAvailability("DUMMY", task_vars).required_images()
+    assert expected == DockerImageAvailability(task_vars=task_vars).required_images()
 
 
 def test_containerized_etcd():
@@ -271,4 +260,4 @@ def test_containerized_etcd():
         group_names=['etcd'],
     )
     expected = set(['registry.access.redhat.com/rhel7/etcd'])
-    assert expected == DockerImageAvailability("DUMMY", task_vars).required_images()
+    assert expected == DockerImageAvailability(task_vars=task_vars).required_images()
diff --git a/roles/openshift_health_checker/test/package_availability_test.py b/roles/openshift_health_checker/test/package_availability_test.py
index 1fe648b75..8aa87ca59 100644
--- a/roles/openshift_health_checker/test/package_availability_test.py
+++ b/roles/openshift_health_checker/test/package_availability_test.py
@@ -56,7 +56,7 @@ def test_package_availability(task_vars, must_have_packages, must_not_have_packa
         assert 'packages' in module_args
         assert set(module_args['packages']).issuperset(must_have_packages)
         assert not set(module_args['packages']).intersection(must_not_have_packages)
-        return return_value
+        return {'foo': return_value}
 
     result = PackageAvailability(execute_module, task_vars).run()
-    assert result is return_value
+    assert result['foo'] is return_value
diff --git a/roles/openshift_health_checker/test/package_update_test.py b/roles/openshift_health_checker/test/package_update_test.py
index 06489b0d7..7d9035a36 100644
--- a/roles/openshift_health_checker/test/package_update_test.py
+++ b/roles/openshift_health_checker/test/package_update_test.py
@@ -9,7 +9,7 @@ def test_package_update():
         assert 'packages' in module_args
         # empty list of packages means "generic check if 'yum update' will work"
         assert module_args['packages'] == []
-        return return_value
+        return {'foo': return_value}
 
     result = PackageUpdate(execute_module).run()
-    assert result is return_value
+    assert result['foo'] is return_value
diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py
index 6054d3f3e..8564cd4db 100644
--- a/roles/openshift_health_checker/test/package_version_test.py
+++ b/roles/openshift_health_checker/test/package_version_test.py
@@ -5,6 +5,7 @@ from openshift_checks.package_version import PackageVersion, OpenShiftCheckExcep
 
 def task_vars_for(openshift_release, deployment_type):
     return dict(
+        ansible_pkg_mgr='yum',
         openshift=dict(common=dict(service_type=deployment_type)),
         openshift_release=openshift_release,
         openshift_image_tag='v' + openshift_release,
@@ -27,6 +28,7 @@ def test_openshift_version_not_supported():
 
 def test_invalid_openshift_release_format():
     task_vars = dict(
+        ansible_pkg_mgr='yum',
         openshift=dict(common=dict(service_type='origin')),
         openshift_image_tag='v0',
         openshift_deployment_type='origin',
@@ -50,7 +52,7 @@ def test_invalid_openshift_release_format():
 ])
 def test_package_version(openshift_release):
 
-    return_value = object()
+    return_value = {"foo": object()}
 
     def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *_):
         assert module_name == 'aos_version'
@@ -64,7 +66,7 @@ def test_package_version(openshift_release):
 
     check = PackageVersion(execute_module, task_vars_for(openshift_release, 'origin'))
     result = check.run()
-    assert result is return_value
+    assert result == return_value
 
 
 @pytest.mark.parametrize('deployment_type,openshift_release,expected_docker_version', [
@@ -77,7 +79,7 @@ def test_package_version(openshift_release):
 ])
 def test_docker_package_version(deployment_type, openshift_release, expected_docker_version):
 
-    return_value = object()
+    return_value = {"foo": object()}
 
     def execute_module(module_name=None, module_args=None, *_):
         assert module_name == 'aos_version'
@@ -91,7 +93,7 @@ def test_docker_package_version(deployment_type, openshift_release, expected_doc
 
     check = PackageVersion(execute_module, task_vars_for(openshift_release, deployment_type))
     result = check.run()
-    assert result is return_value
+    assert result == return_value
 
 
 @pytest.mark.parametrize('group_names,is_containerized,is_active', [
diff --git a/roles/openshift_health_checker/test/zz_failure_summary_test.py b/roles/openshift_health_checker/test/zz_failure_summary_test.py
new file mode 100644
index 000000000..69f27653c
--- /dev/null
+++ b/roles/openshift_health_checker/test/zz_failure_summary_test.py
@@ -0,0 +1,85 @@
+from zz_failure_summary import deduplicate_failures
+
+import pytest
+
+
+@pytest.mark.parametrize('failures,deduplicated', [
+    (
+        [
+            {
+                'host': 'master1',
+                'msg': 'One or more checks failed',
+            },
+        ],
+        [
+            {
+                'host': ('master1',),
+                'msg': 'One or more checks failed',
+            },
+        ],
+    ),
+    (
+        [
+            {
+                'host': 'master1',
+                'msg': 'One or more checks failed',
+            },
+            {
+                'host': 'node1',
+                'msg': 'One or more checks failed',
+            },
+        ],
+        [
+            {
+                'host': ('master1', 'node1'),
+                'msg': 'One or more checks failed',
+            },
+        ],
+    ),
+    (
+        [
+            {
+                'host': 'node1',
+                'msg': 'One or more checks failed',
+                'checks': (('test_check', 'error message'),),
+            },
+            {
+                'host': 'master2',
+                'msg': 'Some error happened',
+            },
+            {
+                'host': 'master1',
+                'msg': 'One or more checks failed',
+                'checks': (('test_check', 'error message'),),
+            },
+        ],
+        [
+            {
+                'host': ('master1', 'node1'),
+                'msg': 'One or more checks failed',
+                'checks': (('test_check', 'error message'),),
+            },
+            {
+                'host': ('master2',),
+                'msg': 'Some error happened',
+            },
+        ],
+    ),
+    # if a failure contain an unhashable value, it will not be deduplicated
+    (
+        [
+            {
+                'host': 'master1',
+                'msg': {'unhashable': 'value'},
+            },
+        ],
+        [
+            {
+                'host': 'master1',
+                'msg': {'unhashable': 'value'},
+            },
+        ],
+    ),
+])
+def test_deduplicate_failures(failures, deduplicated):
+    assert deduplicate_failures(failures) == deduplicated