summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_health_checker')
-rw-r--r--roles/openshift_health_checker/action_plugins/openshift_health_check.py15
-rw-r--r--roles/openshift_health_checker/callback_plugins/zz_failure_summary.py41
-rw-r--r--[-rwxr-xr-x]roles/openshift_health_checker/library/aos_version.py34
-rw-r--r--[-rwxr-xr-x]roles/openshift_health_checker/library/check_yum_update.py0
-rw-r--r--roles/openshift_health_checker/library/docker_info.py2
-rw-r--r--roles/openshift_health_checker/library/search_journalctl.py150
-rw-r--r--roles/openshift_health_checker/openshift_checks/__init__.py82
-rw-r--r--roles/openshift_health_checker/openshift_checks/disk_availability.py137
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py205
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_storage.py191
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py29
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_traffic.py44
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_volume.py23
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/curator.py15
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py51
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/fluentd.py49
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/kibana.py37
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/logging.py42
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py130
-rw-r--r--roles/openshift_health_checker/openshift_checks/memory_availability.py21
-rw-r--r--roles/openshift_health_checker/openshift_checks/mixins.py28
-rw-r--r--roles/openshift_health_checker/openshift_checks/ovs_version.py23
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_availability.py24
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_update.py8
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_version.py43
-rw-r--r--roles/openshift_health_checker/test/action_plugin_test.py27
-rw-r--r--roles/openshift_health_checker/test/aos_version_test.py137
-rw-r--r--roles/openshift_health_checker/test/disk_availability_test.py38
-rw-r--r--roles/openshift_health_checker/test/docker_image_availability_test.py142
-rw-r--r--roles/openshift_health_checker/test/docker_storage_test.py127
-rw-r--r--roles/openshift_health_checker/test/elasticsearch_test.py26
-rw-r--r--roles/openshift_health_checker/test/etcd_imagedata_size_test.py20
-rw-r--r--roles/openshift_health_checker/test/etcd_traffic_test.py74
-rw-r--r--roles/openshift_health_checker/test/etcd_volume_test.py9
-rw-r--r--roles/openshift_health_checker/test/fluentd_test.py4
-rw-r--r--roles/openshift_health_checker/test/kibana_test.py18
-rw-r--r--roles/openshift_health_checker/test/logging_check_test.py46
-rw-r--r--roles/openshift_health_checker/test/logging_index_time_test.py170
-rw-r--r--roles/openshift_health_checker/test/memory_availability_test.py8
-rw-r--r--roles/openshift_health_checker/test/mixins_test.py4
-rw-r--r--roles/openshift_health_checker/test/openshift_check_test.py43
-rw-r--r--roles/openshift_health_checker/test/ovs_version_test.py17
-rw-r--r--roles/openshift_health_checker/test/package_availability_test.py7
-rw-r--r--roles/openshift_health_checker/test/package_update_test.py5
-rw-r--r--roles/openshift_health_checker/test/package_version_test.py56
-rw-r--r--roles/openshift_health_checker/test/search_journalctl_test.py157
46 files changed, 1840 insertions, 719 deletions
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
index a62e4331e..581dd7d15 100644
--- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py
+++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
@@ -37,15 +37,14 @@ class ActionModule(ActionBase):
return result
try:
- known_checks = self.load_known_checks()
+ known_checks = self.load_known_checks(tmp, task_vars)
+ args = self._task.args
+ resolved_checks = resolve_checks(args.get("checks", []), known_checks.values())
except OpenShiftCheckException as e:
result["failed"] = True
result["msg"] = str(e)
return result
- args = self._task.args
- resolved_checks = resolve_checks(args.get("checks", []), known_checks.values())
-
result["checks"] = check_results = {}
user_disabled_checks = [
@@ -57,13 +56,13 @@ class ActionModule(ActionBase):
display.banner("CHECK [{} : {}]".format(check_name, task_vars["ansible_host"]))
check = known_checks[check_name]
- if not check.is_active(task_vars):
+ if not check.is_active():
r = dict(skipped=True, skipped_reason="Not active for this host")
elif check_name in user_disabled_checks:
r = dict(skipped=True, skipped_reason="Disabled by user request")
else:
try:
- r = check.run(tmp, task_vars)
+ r = check.run()
except OpenShiftCheckException as e:
r = dict(
failed=True,
@@ -79,7 +78,7 @@ class ActionModule(ActionBase):
result["changed"] = any(r.get("changed", False) for r in check_results.values())
return result
- def load_known_checks(self):
+ def load_known_checks(self, tmp, task_vars):
load_checks()
known_checks = {}
@@ -92,7 +91,7 @@ class ActionModule(ActionBase):
check_name,
cls.__module__, cls.__name__,
other_cls.__module__, other_cls.__name__))
- known_checks[check_name] = cls(execute_module=self._execute_module)
+ known_checks[check_name] = cls(execute_module=self._execute_module, tmp=tmp, task_vars=task_vars)
return known_checks
diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
index 64c29a8d9..d10200719 100644
--- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
+++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
@@ -1,6 +1,6 @@
-'''
-Ansible callback plugin.
-'''
+"""
+Ansible callback plugin to give a nicely formatted summary of failures.
+"""
# Reason: In several locations below we disable pylint protected-access
# for Ansible objects that do not give us any public way
@@ -16,11 +16,11 @@ from ansible.utils.color import stringc
class CallbackModule(CallbackBase):
- '''
+ """
This callback plugin stores task results and summarizes failures.
The file name is prefixed with `zz_` to make this plugin be loaded last by
Ansible, thus making its output the last thing that users see.
- '''
+ """
CALLBACK_VERSION = 2.0
CALLBACK_TYPE = 'aggregate'
@@ -39,7 +39,8 @@ class CallbackModule(CallbackBase):
def v2_runner_on_failed(self, result, ignore_errors=False):
super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors)
- self.__failures.append(dict(result=result, ignore_errors=ignore_errors))
+ if not ignore_errors:
+ self.__failures.append(dict(result=result, ignore_errors=ignore_errors))
def v2_playbook_on_stats(self, stats):
super(CallbackModule, self).v2_playbook_on_stats(stats)
@@ -47,7 +48,7 @@ class CallbackModule(CallbackBase):
self._print_failure_details(self.__failures)
def _print_failure_details(self, failures):
- '''Print a summary of failed tasks or checks.'''
+ """Print a summary of failed tasks or checks."""
self._display.display(u'\nFailure summary:\n')
width = len(str(len(failures)))
@@ -68,7 +69,9 @@ class CallbackModule(CallbackBase):
playbook_context = None
# re: result attrs see top comment # pylint: disable=protected-access
for failure in failures:
- # get context from check task result since callback plugins cannot access task vars
+ # Get context from check task result since callback plugins cannot access task vars.
+ # NOTE: thus context is not known unless checks run. Failures prior to checks running
+ # don't have playbook_context in the results. But we only use it now when checks fail.
playbook_context = playbook_context or failure['result']._result.get('playbook_context')
failed_checks.update(
name
@@ -80,8 +83,11 @@ class CallbackModule(CallbackBase):
def _print_check_failure_summary(self, failed_checks, context):
checks = ','.join(sorted(failed_checks))
- # NOTE: context is not set if all failures occurred prior to checks task
- summary = (
+ # The purpose of specifying context is to vary the output depending on what the user was
+ # expecting to happen (based on which playbook they ran). The only use currently is to
+ # vary the message depending on whether the user was deliberately running checks or was
+ # trying to install/upgrade and checks are just included. Other use cases may arise.
+ summary = ( # default to explaining what checks are in the first place
'\n'
'The execution of "{playbook}"\n'
'includes checks designed to fail early if the requirements\n'
@@ -93,27 +99,26 @@ class CallbackModule(CallbackBase):
'Some checks may be configurable by variables if your requirements\n'
'are different from the defaults; consult check documentation.\n'
'Variables can be set in the inventory or passed on the\n'
- 'command line using the -e flag to ansible-playbook.\n'
+ 'command line using the -e flag to ansible-playbook.\n\n'
).format(playbook=self._playbook_file, checks=checks)
if context in ['pre-install', 'health']:
- summary = (
+ summary = ( # user was expecting to run checks, less explanation needed
'\n'
'You may choose to configure or disable failing checks by\n'
'setting Ansible variables. To disable those above:\n\n'
' openshift_disable_check={checks}\n\n'
'Consult check documentation for configurable variables.\n'
'Variables can be set in the inventory or passed on the\n'
- 'command line using the -e flag to ansible-playbook.\n'
+ 'command line using the -e flag to ansible-playbook.\n\n'
).format(checks=checks)
- # other expected contexts: install, upgrade
self._display.display(summary)
# re: result attrs see top comment # pylint: disable=protected-access
def _format_failure(failure):
- '''Return a list of pretty-formatted text entries describing a failure, including
+ """Return a list of pretty-formatted text entries describing a failure, including
relevant information about it. Expect that the list of text entries will be joined
- by a newline separator when output to the user.'''
+ by a newline separator when output to the user."""
result = failure['result']
host = result._host.get_name()
play = _get_play(result._task)
@@ -134,7 +139,7 @@ def _format_failure(failure):
def _format_failed_checks(checks):
- '''Return pretty-formatted text describing checks that failed.'''
+ """Return pretty-formatted text describing checks that failed."""
failed_check_msgs = []
for check, body in checks.items():
if body.get('failed', False): # only show the failed checks
@@ -149,7 +154,7 @@ def _format_failed_checks(checks):
# This is inspired by ansible.playbook.base.Base.dump_me.
# re: play/task/block attrs see top comment # pylint: disable=protected-access
def _get_play(obj):
- '''Given a task or block, recursively tries to find its parent play.'''
+ """Given a task or block, recursively try to find its parent play."""
if hasattr(obj, '_play'):
return obj._play
if getattr(obj, '_parent'):
diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py
index 4c205e48c..f9babebb9 100755..100644
--- a/roles/openshift_health_checker/library/aos_version.py
+++ b/roles/openshift_health_checker/library/aos_version.py
@@ -1,5 +1,5 @@
#!/usr/bin/python
-'''
+"""
Ansible module for yum-based systems determining if multiple releases
of an OpenShift package are available, and if the release requested
(if any) is available down to the given precision.
@@ -16,9 +16,13 @@ of release availability already. Without duplicating all that, we would
like the user to have a helpful error message if we detect things will
not work out right. Note that if openshift_release is not specified in
the inventory, the version comparison checks just pass.
-'''
+"""
from ansible.module_utils.basic import AnsibleModule
+# NOTE: because of the dependency on yum (Python 2-only), this module does not
+# work under Python 3. But since we run unit tests against both Python 2 and
+# Python 3, we use six for cross compatibility in this module alone:
+from ansible.module_utils.six import string_types
IMPORT_EXCEPTION = None
try:
@@ -28,7 +32,7 @@ except ImportError as err:
class AosVersionException(Exception):
- '''Base exception class for package version problems'''
+ """Base exception class for package version problems"""
def __init__(self, message, problem_pkgs=None):
Exception.__init__(self, message)
self.problem_pkgs = problem_pkgs
@@ -122,12 +126,15 @@ def _check_precise_version_found(pkgs, expected_pkgs_dict):
for pkg in pkgs:
if pkg.name not in expected_pkgs_dict:
continue
- # does the version match, to the precision requested?
- # and, is it strictly greater, at the precision requested?
- expected_pkg_version = expected_pkgs_dict[pkg.name]["version"]
- match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1])
- if match_version == expected_pkg_version:
- pkgs_precise_version_found.add(pkg.name)
+ expected_pkg_versions = expected_pkgs_dict[pkg.name]["version"]
+ if isinstance(expected_pkg_versions, string_types):
+ expected_pkg_versions = [expected_pkg_versions]
+ for expected_pkg_version in expected_pkg_versions:
+ # does the version match, to the precision requested?
+ # and, is it strictly greater, at the precision requested?
+ match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1])
+ if match_version == expected_pkg_version:
+ pkgs_precise_version_found.add(pkg.name)
not_found = []
for name, pkg in expected_pkgs_dict.items():
@@ -157,8 +164,13 @@ def _check_higher_version_found(pkgs, expected_pkgs_dict):
for pkg in pkgs:
if pkg.name not in expected_pkg_names:
continue
- expected_pkg_version = expected_pkgs_dict[pkg.name]["version"]
- req_release_arr = [int(segment) for segment in expected_pkg_version.split(".")]
+ expected_pkg_versions = expected_pkgs_dict[pkg.name]["version"]
+ if isinstance(expected_pkg_versions, string_types):
+ expected_pkg_versions = [expected_pkg_versions]
+ # NOTE: the list of versions is assumed to be sorted so that the highest
+ # desirable version is the last.
+ highest_desirable_version = expected_pkg_versions[-1]
+ req_release_arr = [int(segment) for segment in highest_desirable_version.split(".")]
version = [int(segment) for segment in pkg.version.split(".")]
too_high = version[:len(req_release_arr)] > req_release_arr
higher_than_seen = version > higher_version_for_pkg.get(pkg.name, [])
diff --git a/roles/openshift_health_checker/library/check_yum_update.py b/roles/openshift_health_checker/library/check_yum_update.py
index 433795b67..433795b67 100755..100644
--- a/roles/openshift_health_checker/library/check_yum_update.py
+++ b/roles/openshift_health_checker/library/check_yum_update.py
diff --git a/roles/openshift_health_checker/library/docker_info.py b/roles/openshift_health_checker/library/docker_info.py
index 7f712bcff..0d0ddae8b 100644
--- a/roles/openshift_health_checker/library/docker_info.py
+++ b/roles/openshift_health_checker/library/docker_info.py
@@ -1,4 +1,3 @@
-# pylint: disable=missing-docstring
"""
Ansible module for determining information about the docker host.
@@ -13,6 +12,7 @@ from ansible.module_utils.docker_common import AnsibleDockerClient
def main():
+ """Entrypoint for running an Ansible module."""
client = AnsibleDockerClient()
client.module.exit_json(
diff --git a/roles/openshift_health_checker/library/search_journalctl.py b/roles/openshift_health_checker/library/search_journalctl.py
new file mode 100644
index 000000000..3631f71c8
--- /dev/null
+++ b/roles/openshift_health_checker/library/search_journalctl.py
@@ -0,0 +1,150 @@
+#!/usr/bin/python
+"""Interface to journalctl."""
+
+from time import time
+import json
+import re
+import subprocess
+
+from ansible.module_utils.basic import AnsibleModule
+
+
+class InvalidMatcherRegexp(Exception):
+ """Exception class for invalid matcher regexp."""
+ pass
+
+
+class InvalidLogEntry(Exception):
+ """Exception class for invalid / non-json log entries."""
+ pass
+
+
+class LogInputSubprocessError(Exception):
+ """Exception class for errors that occur while executing a subprocess."""
+ pass
+
+
+def main():
+ """Scan a given list of "log_matchers" for journalctl messages containing given patterns.
+ "log_matchers" is a list of dicts consisting of three keys that help fine-tune log searching:
+ 'start_regexp', 'regexp', and 'unit'.
+
+ Sample "log_matchers" list:
+
+ [
+ {
+ 'start_regexp': r'Beginning of systemd unit',
+ 'regexp': r'the specific log message to find',
+ 'unit': 'etcd',
+ }
+ ]
+ """
+ module = AnsibleModule(
+ argument_spec=dict(
+ log_count_limit=dict(type="int", default=500),
+ log_matchers=dict(type="list", required=True),
+ ),
+ )
+
+ timestamp_limit_seconds = time() - 60 * 60 # 1 hour
+
+ log_count_limit = module.params["log_count_limit"]
+ log_matchers = module.params["log_matchers"]
+
+ matched_regexp, errors = get_log_matches(log_matchers, log_count_limit, timestamp_limit_seconds)
+
+ module.exit_json(
+ changed=False,
+ failed=bool(errors),
+ errors=errors,
+ matched=matched_regexp,
+ )
+
+
+def get_log_matches(matchers, log_count_limit, timestamp_limit_seconds):
+ """Return a list of up to log_count_limit matches for each matcher.
+
+ Log entries are only considered if newer than timestamp_limit_seconds.
+ """
+ matched_regexp = []
+ errors = []
+
+ for matcher in matchers:
+ try:
+ log_output = get_log_output(matcher)
+ except LogInputSubprocessError as err:
+ errors.append(str(err))
+ continue
+
+ try:
+ matched = find_matches(log_output, matcher, log_count_limit, timestamp_limit_seconds)
+ if matched:
+ matched_regexp.append(matcher.get("regexp", ""))
+ except InvalidMatcherRegexp as err:
+ errors.append(str(err))
+ except InvalidLogEntry as err:
+ errors.append(str(err))
+
+ return matched_regexp, errors
+
+
+def get_log_output(matcher):
+ """Return an iterator on the logs of a given matcher."""
+ try:
+ cmd_output = subprocess.Popen(list([
+ '/bin/journalctl',
+ '-ru', matcher.get("unit", ""),
+ '--output', 'json',
+ ]), stdout=subprocess.PIPE)
+
+ return iter(cmd_output.stdout.readline, '')
+
+ except subprocess.CalledProcessError as exc:
+ msg = "Could not obtain journalctl logs for the specified systemd unit: {}: {}"
+ raise LogInputSubprocessError(msg.format(matcher.get("unit", "<missing>"), str(exc)))
+ except OSError as exc:
+ raise LogInputSubprocessError(str(exc))
+
+
+def find_matches(log_output, matcher, log_count_limit, timestamp_limit_seconds):
+ """Return log messages matched in iterable log_output by a given matcher.
+
+ Ignore any log_output items older than timestamp_limit_seconds.
+ """
+ try:
+ regexp = re.compile(matcher.get("regexp", ""))
+ start_regexp = re.compile(matcher.get("start_regexp", ""))
+ except re.error as err:
+ msg = "A log matcher object was provided with an invalid regular expression: {}"
+ raise InvalidMatcherRegexp(msg.format(str(err)))
+
+ matched = None
+
+ for log_count, line in enumerate(log_output):
+ if log_count >= log_count_limit:
+ break
+
+ try:
+ obj = json.loads(line)
+
+ # don't need to look past the most recent service restart
+ if start_regexp.match(obj["MESSAGE"]):
+ break
+
+ log_timestamp_seconds = float(obj["__REALTIME_TIMESTAMP"]) / 1000000
+ if log_timestamp_seconds < timestamp_limit_seconds:
+ break
+
+ if regexp.match(obj["MESSAGE"]):
+ matched = line
+ break
+
+ except ValueError:
+ msg = "Log entry for systemd unit {} contained invalid json syntax: {}"
+ raise InvalidLogEntry(msg.format(matcher.get("unit"), line))
+
+ return matched
+
+
+if __name__ == '__main__':
+ main()
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py
index 5c9949ced..40a28cde5 100644
--- a/roles/openshift_health_checker/openshift_checks/__init__.py
+++ b/roles/openshift_health_checker/openshift_checks/__init__.py
@@ -19,15 +19,21 @@ class OpenShiftCheckException(Exception):
@six.add_metaclass(ABCMeta)
class OpenShiftCheck(object):
- """A base class for defining checks for an OpenShift cluster environment."""
+ """
+ A base class for defining checks for an OpenShift cluster environment.
+
+ Expect optional params: method execute_module, dict task_vars, and string tmp.
+ execute_module is expected to have a signature compatible with _execute_module
+ from ansible plugins/action/__init__.py, e.g.:
+ def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *args):
+ This is stored so that it can be invoked in subclasses via check.execute_module("name", args)
+ which provides the check's stored task_vars and tmp.
+ """
- def __init__(self, execute_module=None, module_executor=None):
- if execute_module is module_executor is None:
- raise TypeError(
- "__init__() takes either execute_module (recommended) "
- "or module_executor (deprecated), none given")
- self.execute_module = execute_module or module_executor
- self.module_executor = self.execute_module
+ def __init__(self, execute_module=None, task_vars=None, tmp=None):
+ self._execute_module = execute_module
+ self.task_vars = task_vars or {}
+ self.tmp = tmp
@abstractproperty
def name(self):
@@ -43,13 +49,13 @@ class OpenShiftCheck(object):
"""
return []
- @classmethod
- def is_active(cls, task_vars): # pylint: disable=unused-argument
+ @staticmethod
+ def is_active():
"""Returns true if this check applies to the ansible-playbook run."""
return True
@abstractmethod
- def run(self, tmp, task_vars):
+ def run(self):
"""Executes a check, normally implemented as a module."""
return {}
@@ -62,6 +68,43 @@ class OpenShiftCheck(object):
for subclass in subclass.subclasses():
yield subclass
+ def execute_module(self, module_name=None, module_args=None):
+ """Invoke an Ansible module from a check.
+
+ Invoke stored _execute_module, normally copied from the action
+ plugin, with its params and the task_vars and tmp given at
+ check initialization. No positional parameters beyond these
+ are specified. If it's necessary to specify any of the other
+ parameters to _execute_module then that should just be invoked
+ directly (with awareness of changes in method signature per
+ Ansible version).
+
+ So e.g. check.execute_module("foo", dict(arg1=...))
+ Return: result hash from module execution.
+ """
+ if self._execute_module is None:
+ raise NotImplementedError(
+ self.__class__.__name__ +
+ " invoked execute_module without providing the method at initialization."
+ )
+ return self._execute_module(module_name, module_args, self.tmp, self.task_vars)
+
+ def get_var(self, *keys, **kwargs):
+ """Get deeply nested values from task_vars.
+
+ Ansible task_vars structures are Python dicts, often mapping strings to
+ other dicts. This helper makes it easier to get a nested value, raising
+ OpenShiftCheckException when a key is not found or returning a default value
+ provided as a keyword argument.
+ """
+ try:
+ value = reduce(operator.getitem, keys, self.task_vars)
+ except (KeyError, TypeError):
+ if "default" in kwargs:
+ return kwargs["default"]
+ raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys))))
+ return value
+
LOADER_EXCLUDES = (
"__init__.py",
@@ -86,20 +129,3 @@ def load_checks(path=None, subpkg=""):
modules.append(import_module(__package__ + subpkg + "." + name[:-3]))
return modules
-
-
-def get_var(task_vars, *keys, **kwargs):
- """Helper function to get deeply nested values from task_vars.
-
- Ansible task_vars structures are Python dicts, often mapping strings to
- other dicts. This helper makes it easier to get a nested value, raising
- OpenShiftCheckException when a key is not found or returning a default value
- provided as a keyword argument.
- """
- try:
- value = reduce(operator.getitem, keys, task_vars)
- except (KeyError, TypeError):
- if "default" in kwargs:
- return kwargs["default"]
- raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys))))
- return value
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py
index 962148cb8..283461294 100644
--- a/roles/openshift_health_checker/openshift_checks/disk_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py
@@ -1,9 +1,12 @@
-# pylint: disable=missing-docstring
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
-from openshift_checks.mixins import NotContainerizedMixin
+"""Check that there is enough disk space in predefined paths."""
+import os.path
+import tempfile
-class DiskAvailability(NotContainerizedMixin, OpenShiftCheck):
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException
+
+
+class DiskAvailability(OpenShiftCheck):
"""Check that recommended disk space is available before a first-time install."""
name = "disk_availability"
@@ -12,56 +15,100 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck):
# Values taken from the official installation documentation:
# https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements
recommended_disk_space_bytes = {
- "masters": 40 * 10**9,
- "nodes": 15 * 10**9,
- "etcd": 20 * 10**9,
+ '/var': {
+ 'masters': 40 * 10**9,
+ 'nodes': 15 * 10**9,
+ 'etcd': 20 * 10**9,
+ },
+ # Used to copy client binaries into,
+ # see roles/openshift_cli/library/openshift_container_binary_sync.py.
+ '/usr/local/bin': {
+ 'masters': 1 * 10**9,
+ 'nodes': 1 * 10**9,
+ 'etcd': 1 * 10**9,
+ },
+ # Used as temporary storage in several cases.
+ tempfile.gettempdir(): {
+ 'masters': 1 * 10**9,
+ 'nodes': 1 * 10**9,
+ 'etcd': 1 * 10**9,
+ },
}
- @classmethod
- def is_active(cls, task_vars):
+ def is_active(self):
"""Skip hosts that do not have recommended disk space requirements."""
- group_names = get_var(task_vars, "group_names", default=[])
- has_disk_space_recommendation = bool(set(group_names).intersection(cls.recommended_disk_space_bytes))
- return super(DiskAvailability, cls).is_active(task_vars) and has_disk_space_recommendation
-
- def run(self, tmp, task_vars):
- group_names = get_var(task_vars, "group_names")
- ansible_mounts = get_var(task_vars, "ansible_mounts")
- free_bytes = self.openshift_available_disk(ansible_mounts)
-
- recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names)
- configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9
- min_free_bytes = configured_min or recommended_min
-
- if free_bytes < min_free_bytes:
- return {
- 'failed': True,
- 'msg': (
- 'Available disk space ({:.1f} GB) for the volume containing '
- '"/var" is below minimum recommended space ({:.1f} GB)'
- ).format(float(free_bytes) / 10**9, float(min_free_bytes) / 10**9)
+ group_names = self.get_var("group_names", default=[])
+ active_groups = set()
+ for recommendation in self.recommended_disk_space_bytes.values():
+ active_groups.update(recommendation.keys())
+ has_disk_space_recommendation = bool(active_groups.intersection(group_names))
+ return super(DiskAvailability, self).is_active() and has_disk_space_recommendation
+
+ def run(self):
+ group_names = self.get_var("group_names")
+ ansible_mounts = self.get_var("ansible_mounts")
+ ansible_mounts = {mount['mount']: mount for mount in ansible_mounts}
+
+ user_config = self.get_var("openshift_check_min_host_disk_gb", default={})
+ try:
+ # For backwards-compatibility, if openshift_check_min_host_disk_gb
+ # is a number, then it overrides the required config for '/var'.
+ number = float(user_config)
+ user_config = {
+ '/var': {
+ 'masters': number,
+ 'nodes': number,
+ 'etcd': number,
+ },
}
+ except TypeError:
+ # If it is not a number, then it should be a nested dict.
+ pass
+
+ # TODO: as suggested in
+ # https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021,
+ # maybe we could support checking disk availability in paths that are
+ # not part of the official recommendation but present in the user
+ # configuration.
+ for path, recommendation in self.recommended_disk_space_bytes.items():
+ free_bytes = self.free_bytes(path, ansible_mounts)
+ recommended_bytes = max(recommendation.get(name, 0) for name in group_names)
+
+ config = user_config.get(path, {})
+ # NOTE: the user config is in GB, but we compare bytes, thus the
+ # conversion.
+ config_bytes = max(config.get(name, 0) for name in group_names) * 10**9
+ recommended_bytes = config_bytes or recommended_bytes
+
+ if free_bytes < recommended_bytes:
+ free_gb = float(free_bytes) / 10**9
+ recommended_gb = float(recommended_bytes) / 10**9
+ return {
+ 'failed': True,
+ 'msg': (
+ 'Available disk space in "{}" ({:.1f} GB) '
+ 'is below minimum recommended ({:.1f} GB)'
+ ).format(path, free_gb, recommended_gb)
+ }
return {}
@staticmethod
- def openshift_available_disk(ansible_mounts):
- """Determine the available disk space for an OpenShift installation.
-
- ansible_mounts should be a list of dicts like the 'setup' Ansible module
- returns.
- """
- # priority list in descending order
- supported_mnt_paths = ["/var", "/"]
- available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
+ def free_bytes(path, ansible_mounts):
+ """Return the size available in path based on ansible_mounts."""
+ mount_point = path
+ # arbitry value to prevent an infinite loop, in the unlike case that '/'
+ # is not in ansible_mounts.
+ max_depth = 32
+ while mount_point not in ansible_mounts and max_depth > 0:
+ mount_point = os.path.dirname(mount_point)
+ max_depth -= 1
try:
- for path in supported_mnt_paths:
- if path in available_mnts:
- return available_mnts[path]["size_available"]
+ free_bytes = ansible_mounts[mount_point]['size_available']
except KeyError:
- pass
+ known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(ansible_mounts)) or 'none'
+ msg = 'Unable to determine disk availability for "{}". Known mount points: {}.'
+ raise OpenShiftCheckException(msg.format(path, known_mounts))
- paths = ''.join(sorted(available_mnts)) or 'none'
- msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths)
- raise OpenShiftCheckException(msg)
+ return free_bytes
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 27e6fe383..77180223e 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -1,42 +1,47 @@
-# pylint: disable=missing-docstring
-from openshift_checks import OpenShiftCheck, get_var
+"""Check that required Docker images are available."""
+
+from openshift_checks import OpenShiftCheck
from openshift_checks.mixins import DockerHostMixin
+NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"]
+DEPLOYMENT_IMAGE_INFO = {
+ "origin": {
+ "namespace": "openshift",
+ "name": "origin",
+ "registry_console_image": "cockpit/kubernetes",
+ },
+ "openshift-enterprise": {
+ "namespace": "openshift3",
+ "name": "ose",
+ "registry_console_image": "registry.access.redhat.com/openshift3/registry-console",
+ },
+}
+
+
class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
"""Check that required Docker images are available.
- This check attempts to ensure that required docker images are
- either present locally, or able to be pulled down from available
- registries defined in a host machine.
+ Determine docker images that an install would require and check that they
+ are either present in the host's docker index, or available for the host to pull
+ with known registries as defined in our inventory file (or defaults).
"""
name = "docker_image_availability"
tags = ["preflight"]
+ # we use python-docker-py to check local docker for images, and skopeo
+ # to look for images available remotely without waiting to pull them.
+ dependencies = ["python-docker-py", "skopeo"]
- dependencies = ["skopeo", "python-docker-py"]
-
- deployment_image_info = {
- "origin": {
- "namespace": "openshift",
- "name": "origin",
- },
- "openshift-enterprise": {
- "namespace": "openshift3",
- "name": "ose",
- },
- }
-
- @classmethod
- def is_active(cls, task_vars):
+ def is_active(self):
"""Skip hosts with unsupported deployment types."""
- deployment_type = get_var(task_vars, "openshift_deployment_type")
- has_valid_deployment_type = deployment_type in cls.deployment_image_info
+ deployment_type = self.get_var("openshift_deployment_type")
+ has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO
- return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type
+ return super(DockerImageAvailability, self).is_active() and has_valid_deployment_type
- def run(self, tmp, task_vars):
- msg, failed, changed = self.ensure_dependencies(task_vars)
+ def run(self):
+ msg, failed, changed = self.ensure_dependencies()
if failed:
return {
"failed": True,
@@ -44,18 +49,18 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
"msg": "Some dependencies are required in order to check Docker image availability.\n" + msg
}
- required_images = self.required_images(task_vars)
- missing_images = set(required_images) - set(self.local_images(required_images, task_vars))
+ required_images = self.required_images()
+ missing_images = set(required_images) - set(self.local_images(required_images))
# exit early if all images were found locally
if not missing_images:
return {"changed": changed}
- registries = self.known_docker_registries(task_vars)
+ registries = self.known_docker_registries()
if not registries:
return {"failed": True, "msg": "Unable to retrieve any docker registries.", "changed": changed}
- available_images = self.available_images(missing_images, registries, task_vars)
+ available_images = self.available_images(missing_images, registries)
unavailable_images = set(missing_images) - set(available_images)
if unavailable_images:
@@ -70,72 +75,77 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
return {"changed": changed}
- def required_images(self, task_vars):
- deployment_type = get_var(task_vars, "openshift_deployment_type")
- image_info = self.deployment_image_info[deployment_type]
-
- openshift_release = get_var(task_vars, "openshift_release", default="latest")
- openshift_image_tag = get_var(task_vars, "openshift_image_tag")
- is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
-
- images = set(self.required_docker_images(
- image_info["namespace"],
- image_info["name"],
- ["registry-console"] if "enterprise" in deployment_type else [], # include enterprise-only image names
- openshift_release,
- is_containerized,
- ))
-
- # append images with qualified image tags to our list of required images.
- # these are images with a (v0.0.0.0) tag, rather than a standard release
- # format tag (v0.0). We want to check this set in both containerized and
- # non-containerized installations.
- images.update(
- self.required_qualified_docker_images(
- image_info["namespace"],
- image_info["name"],
- openshift_image_tag,
- ),
- )
-
- return images
-
- @staticmethod
- def required_docker_images(namespace, name, additional_image_names, version, is_containerized):
- if is_containerized:
- return ["{}/{}:{}".format(namespace, name, version)] if name else []
-
- # include additional non-containerized images specific to the current deployment type
- return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names]
-
- @staticmethod
- def required_qualified_docker_images(namespace, name, version):
- # pylint: disable=invalid-name
- return [
- "{}/{}-{}:{}".format(namespace, name, suffix, version)
- for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"]
- ]
-
- def local_images(self, images, task_vars):
+ def required_images(self):
+ """
+ Determine which images we expect to need for this host.
+ Returns: a set of required images like 'openshift/origin:v3.6'
+
+ The thorny issue of determining the image names from the variables is under consideration
+ via https://github.com/openshift/openshift-ansible/issues/4415
+
+ For now we operate as follows:
+ * For containerized components (master, node, ...) we look at the deployment type and
+ use openshift/origin or openshift3/ose as the base for those component images. The
+ version is openshift_image_tag as determined by the openshift_version role.
+ * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if
+ it is defined; otherwise we again use the base that depends on the deployment type.
+ Registry is not included in constructed images. It may be in oreg_url or etcd image.
+ """
+ required = set()
+ deployment_type = self.get_var("openshift_deployment_type")
+ host_groups = self.get_var("group_names")
+ # containerized etcd may not have openshift_image_tag, see bz 1466622
+ image_tag = self.get_var("openshift_image_tag", default="latest")
+ image_info = DEPLOYMENT_IMAGE_INFO[deployment_type]
+ if not image_info:
+ return required
+
+ # template for images that run on top of OpenShift
+ image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}")
+ image_url = self.get_var("oreg_url", default="") or image_url
+ if 'nodes' in host_groups:
+ for suffix in NODE_IMAGE_SUFFIXES:
+ required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag))
+ # The registry-console is for some reason not prefixed with ose- like the other components.
+ # Nor is it versioned the same, so just look for latest.
+ # Also a completely different name is used for Origin.
+ required.add(image_info["registry_console_image"])
+
+ # images for containerized components
+ if self.get_var("openshift", "common", "is_containerized"):
+ components = set()
+ if 'nodes' in host_groups:
+ components.update(["node", "openvswitch"])
+ if 'masters' in host_groups: # name is "origin" or "ose"
+ components.add(image_info["name"])
+ for component in components:
+ required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag))
+ if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise
+ required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag
+
+ return required
+
+ def local_images(self, images):
"""Filter a list of images and return those available locally."""
return [
image for image in images
- if self.is_image_local(image, task_vars)
+ if self.is_image_local(image)
]
- def is_image_local(self, image, task_vars):
- result = self.module_executor("docker_image_facts", {"name": image}, task_vars)
+ def is_image_local(self, image):
+ """Check if image is already in local docker index."""
+ result = self.execute_module("docker_image_facts", {"name": image})
if result.get("failed", False):
return False
return bool(result.get("images", []))
- @staticmethod
- def known_docker_registries(task_vars):
- docker_facts = get_var(task_vars, "openshift", "docker")
+ def known_docker_registries(self):
+ """Build a list of docker registries available according to inventory vars."""
+ docker_facts = self.get_var("openshift", "docker")
regs = set(docker_facts["additional_registries"])
- deployment_type = get_var(task_vars, "openshift_deployment_type")
+ deployment_type = self.get_var("openshift_deployment_type")
if deployment_type == "origin":
regs.update(["docker.io"])
elif "enterprise" in deployment_type:
@@ -143,21 +153,26 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
return list(regs)
- def available_images(self, images, registries, task_vars):
- """Inspect existing images using Skopeo and return all images successfully inspected."""
+ def available_images(self, images, default_registries):
+ """Search remotely for images. Returns: list of images found."""
return [
image for image in images
- if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries)
+ if self.is_available_skopeo_image(image, default_registries)
]
- def is_available_skopeo_image(self, image, registry, task_vars):
- """Uses Skopeo to determine if required image exists in a given registry."""
+ def is_available_skopeo_image(self, image, default_registries):
+ """Use Skopeo to determine if required image exists in known registry(s)."""
+ registries = default_registries
+
+ # if image already includes a registry, only use that
+ if image.count("/") > 1:
+ registry, image = image.split("/", 1)
+ registries = [registry]
- cmd_str = "skopeo inspect docker://{registry}/{image}".format(
- registry=registry,
- image=image,
- )
+ for registry in registries:
+ args = {"_raw_params": "skopeo inspect --tls-verify=false docker://{}/{}".format(registry, image)}
+ result = self.execute_module("command", args)
+ if result.get("rc", 0) == 0 and not result.get("failed"):
+ return True
- args = {"_raw_params": cmd_str}
- result = self.module_executor("command", args, task_vars)
- return not result.get("failed", False) and result.get("rc", 0) == 0
+ return False
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py
index 7f1751b36..dea15a56e 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_storage.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py
@@ -1,7 +1,8 @@
"""Check Docker storage driver and usage."""
import json
+import os.path
import re
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException
from openshift_checks.mixins import DockerHostMixin
@@ -17,15 +18,32 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
tags = ["pre-install", "health", "preflight"]
dependencies = ["python-docker-py"]
- storage_drivers = ["devicemapper", "overlay2"]
+ storage_drivers = ["devicemapper", "overlay", "overlay2"]
max_thinpool_data_usage_percent = 90.0
max_thinpool_meta_usage_percent = 90.0
+ max_overlay_usage_percent = 90.0
- # pylint: disable=too-many-return-statements
- # Reason: permanent stylistic exception;
- # it is clearer to return on failures and there are just many ways to fail here.
- def run(self, tmp, task_vars):
- msg, failed, changed = self.ensure_dependencies(task_vars)
+ # TODO(lmeyer): mention these in the output when check fails
+ configuration_variables = [
+ (
+ "max_thinpool_data_usage_percent",
+ "For 'devicemapper' storage driver, usage threshold percentage for data. "
+ "Format: float. Default: {:.1f}".format(max_thinpool_data_usage_percent),
+ ),
+ (
+ "max_thinpool_meta_usage_percent",
+ "For 'devicemapper' storage driver, usage threshold percentage for metadata. "
+ "Format: float. Default: {:.1f}".format(max_thinpool_meta_usage_percent),
+ ),
+ (
+ "max_overlay_usage_percent",
+ "For 'overlay' or 'overlay2' storage driver, usage threshold percentage. "
+ "Format: float. Default: {:.1f}".format(max_overlay_usage_percent),
+ ),
+ ]
+
+ def run(self):
+ msg, failed, changed = self.ensure_dependencies()
if failed:
return {
"failed": True,
@@ -34,17 +52,17 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
}
# attempt to get the docker info hash from the API
- info = self.execute_module("docker_info", {}, task_vars)
- if info.get("failed"):
+ docker_info = self.execute_module("docker_info", {})
+ if docker_info.get("failed"):
return {"failed": True, "changed": changed,
"msg": "Failed to query Docker API. Is docker running on this host?"}
- if not info.get("info"): # this would be very strange
+ if not docker_info.get("info"): # this would be very strange
return {"failed": True, "changed": changed,
- "msg": "Docker API query missing info:\n{}".format(json.dumps(info))}
- info = info["info"]
+ "msg": "Docker API query missing info:\n{}".format(json.dumps(docker_info))}
+ docker_info = docker_info["info"]
# check if the storage driver we saw is valid
- driver = info.get("Driver", "[NONE]")
+ driver = docker_info.get("Driver", "[NONE]")
if driver not in self.storage_drivers:
msg = (
"Detected unsupported Docker storage driver '{driver}'.\n"
@@ -53,26 +71,34 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
return {"failed": True, "changed": changed, "msg": msg}
# driver status info is a list of tuples; convert to dict and validate based on driver
- driver_status = {item[0]: item[1] for item in info.get("DriverStatus", [])}
+ driver_status = {item[0]: item[1] for item in docker_info.get("DriverStatus", [])}
+
+ result = {}
+
if driver == "devicemapper":
- if driver_status.get("Data loop file"):
- msg = (
- "Use of loopback devices with the Docker devicemapper storage driver\n"
- "(the default storage configuration) is unsupported in production.\n"
- "Please use docker-storage-setup to configure a backing storage volume.\n"
- "See http://red.ht/2rNperO for further information."
- )
- return {"failed": True, "changed": changed, "msg": msg}
- result = self._check_dm_usage(driver_status, task_vars)
- result['changed'] = result.get('changed', False) or changed
- return result
+ result = self.check_devicemapper_support(driver_status)
- # TODO(lmeyer): determine how to check usage for overlay2
+ if driver in ['overlay', 'overlay2']:
+ result = self.check_overlay_support(docker_info, driver_status)
- return {"changed": changed}
+ result['changed'] = result.get('changed', False) or changed
+ return result
- def _check_dm_usage(self, driver_status, task_vars):
- """
+ def check_devicemapper_support(self, driver_status):
+ """Check if dm storage driver is supported as configured. Return: result dict."""
+ if driver_status.get("Data loop file"):
+ msg = (
+ "Use of loopback devices with the Docker devicemapper storage driver\n"
+ "(the default storage configuration) is unsupported in production.\n"
+ "Please use docker-storage-setup to configure a backing storage volume.\n"
+ "See http://red.ht/2rNperO for further information."
+ )
+ return {"failed": True, "msg": msg}
+ result = self.check_dm_usage(driver_status)
+ return result
+
+ def check_dm_usage(self, driver_status):
+ """Check usage thresholds for Docker dm storage driver. Return: result dict.
Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool
implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures
devicemapper storage. The LV is "thin" because it does not use all available storage
@@ -83,7 +109,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
could run out of space first; so we check both.
"""
vals = dict(
- vg_free=self._get_vg_free(driver_status.get("Pool Name"), task_vars),
+ vg_free=self.get_vg_free(driver_status.get("Pool Name")),
data_used=driver_status.get("Data Space Used"),
data_total=driver_status.get("Data Space Total"),
metadata_used=driver_status.get("Metadata Space Used"),
@@ -93,7 +119,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
# convert all human-readable strings to bytes
for key, value in vals.copy().items():
try:
- vals[key + "_bytes"] = self._convert_to_bytes(value)
+ vals[key + "_bytes"] = self.convert_to_bytes(value)
except ValueError as err: # unlikely to hit this from API info, but just to be safe
return {
"failed": True,
@@ -104,7 +130,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
# determine the threshold percentages which usage should not exceed
for name, default in [("data", self.max_thinpool_data_usage_percent),
("metadata", self.max_thinpool_meta_usage_percent)]:
- percent = get_var(task_vars, "max_thinpool_" + name + "_usage_percent", default=default)
+ percent = self.get_var("max_thinpool_" + name + "_usage_percent", default=default)
try:
vals[name + "_threshold"] = float(percent)
except ValueError:
@@ -131,10 +157,12 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."])
return vals
- def _get_vg_free(self, pool, task_vars):
- # Determine which VG to examine according to the pool name, the only indicator currently
- # available from the Docker API driver info. We assume a name that looks like
- # "vg--name-docker--pool"; vg and lv names with inner hyphens doubled, joined by a hyphen.
+ def get_vg_free(self, pool):
+ """Determine which VG to examine according to the pool name. Return: size vgs reports.
+ Pool name is the only indicator currently available from the Docker API driver info.
+ We assume a name that looks like "vg--name-docker--pool";
+ vg and lv names with inner hyphens doubled, joined by a hyphen.
+ """
match = re.match(r'((?:[^-]|--)+)-(?!-)', pool) # matches up to the first single hyphen
if not match: # unlikely, but... be clear if we assumed wrong
raise OpenShiftCheckException(
@@ -143,10 +171,10 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
"so the available storage in the VG cannot be determined.".format(pool)
)
vg_name = match.groups()[0].replace("--", "-")
- vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name
+ vgs_cmd = "/sbin/vgs --noheadings -o vg_free --units g --select vg_name=" + vg_name
# should return free space like " 12.00g" if the VG exists; empty if it does not
- ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars)
+ ret = self.execute_module("command", {"_raw_params": vgs_cmd})
if ret.get("failed") or ret.get("rc", 0) != 0:
raise OpenShiftCheckException(
"Is LVM installed? Failed to run /sbin/vgs "
@@ -163,7 +191,8 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
return size
@staticmethod
- def _convert_to_bytes(string):
+ def convert_to_bytes(string):
+ """Convert string like "10.3 G" to bytes (binary units assumed). Return: float bytes."""
units = dict(
b=1,
k=1024,
@@ -183,3 +212,87 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
raise ValueError("Cannot convert to a byte size: " + string)
return float(number) * multiplier
+
+ def check_overlay_support(self, docker_info, driver_status):
+ """Check if overlay storage driver is supported for this host. Return: result dict."""
+ # check for xfs as backing store
+ backing_fs = driver_status.get("Backing Filesystem", "[NONE]")
+ if backing_fs != "xfs":
+ msg = (
+ "Docker storage drivers 'overlay' and 'overlay2' are only supported with\n"
+ "'xfs' as the backing storage, but this host's storage is type '{fs}'."
+ ).format(fs=backing_fs)
+ return {"failed": True, "msg": msg}
+
+ # check support for OS and kernel version
+ o_s = docker_info.get("OperatingSystem", "[NONE]")
+ if "Red Hat Enterprise Linux" in o_s or "CentOS" in o_s:
+ # keep it simple, only check enterprise kernel versions; assume everyone else is good
+ kernel = docker_info.get("KernelVersion", "[NONE]")
+ kernel_arr = [int(num) for num in re.findall(r'\d+', kernel)]
+ if kernel_arr < [3, 10, 0, 514]: # rhel < 7.3
+ msg = (
+ "Docker storage drivers 'overlay' and 'overlay2' are only supported beginning with\n"
+ "kernel version 3.10.0-514; but Docker reports kernel version {version}."
+ ).format(version=kernel)
+ return {"failed": True, "msg": msg}
+ # NOTE: we could check for --selinux-enabled here but docker won't even start with
+ # that option until it's supported in the kernel so we don't need to.
+
+ return self.check_overlay_usage(docker_info)
+
+ def check_overlay_usage(self, docker_info):
+ """Check disk usage on OverlayFS backing store volume. Return: result dict."""
+ path = docker_info.get("DockerRootDir", "/var/lib/docker") + "/" + docker_info["Driver"]
+
+ threshold = self.get_var("max_overlay_usage_percent", default=self.max_overlay_usage_percent)
+ try:
+ threshold = float(threshold)
+ except ValueError:
+ return {
+ "failed": True,
+ "msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold),
+ }
+
+ mount = self.find_ansible_mount(path, self.get_var("ansible_mounts"))
+ try:
+ free_bytes = mount['size_available']
+ total_bytes = mount['size_total']
+ usage = 100.0 * (total_bytes - free_bytes) / total_bytes
+ except (KeyError, ZeroDivisionError):
+ return {
+ "failed": True,
+ "msg": "The ansible_mount found for path {} is invalid.\n"
+ "This is likely to be an Ansible bug. The record was:\n"
+ "{}".format(path, json.dumps(mount, indent=2)),
+ }
+
+ if usage > threshold:
+ return {
+ "failed": True,
+ "msg": (
+ "For Docker OverlayFS mount point {path},\n"
+ "usage percentage {pct:.1f} is higher than threshold {thresh:.1f}."
+ ).format(path=mount["mount"], pct=usage, thresh=threshold)
+ }
+
+ return {}
+
+ # TODO(lmeyer): migrate to base class
+ @staticmethod
+ def find_ansible_mount(path, ansible_mounts):
+ """Return the mount point for path from ansible_mounts."""
+
+ mount_for_path = {mount['mount']: mount for mount in ansible_mounts}
+ mount_point = path
+ while mount_point not in mount_for_path:
+ if mount_point in ["/", ""]: # "/" not in ansible_mounts???
+ break
+ mount_point = os.path.dirname(mount_point)
+
+ try:
+ return mount_for_path[mount_point]
+ except KeyError:
+ known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) or 'none'
+ msg = 'Unable to determine mount point for path "{}". Known mount points: {}.'
+ raise OpenShiftCheckException(msg.format(path, known_mounts))
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py
index c04a69765..28c38504d 100644
--- a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py
+++ b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py
@@ -2,7 +2,7 @@
Ansible module for determining if the size of OpenShift image data exceeds a specified limit in an etcd cluster.
"""
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException
class EtcdImageDataSize(OpenShiftCheck):
@@ -11,24 +11,25 @@ class EtcdImageDataSize(OpenShiftCheck):
name = "etcd_imagedata_size"
tags = ["etcd"]
- def run(self, tmp, task_vars):
- etcd_mountpath = self._get_etcd_mountpath(get_var(task_vars, "ansible_mounts"))
+ def run(self):
+ etcd_mountpath = self._get_etcd_mountpath(self.get_var("ansible_mounts"))
etcd_avail_diskspace = etcd_mountpath["size_available"]
etcd_total_diskspace = etcd_mountpath["size_total"]
- etcd_imagedata_size_limit = get_var(task_vars,
- "etcd_max_image_data_size_bytes",
- default=int(0.5 * float(etcd_total_diskspace - etcd_avail_diskspace)))
+ etcd_imagedata_size_limit = self.get_var(
+ "etcd_max_image_data_size_bytes",
+ default=int(0.5 * float(etcd_total_diskspace - etcd_avail_diskspace))
+ )
- etcd_is_ssl = get_var(task_vars, "openshift", "master", "etcd_use_ssl", default=False)
- etcd_port = get_var(task_vars, "openshift", "master", "etcd_port", default=2379)
- etcd_hosts = get_var(task_vars, "openshift", "master", "etcd_hosts")
+ etcd_is_ssl = self.get_var("openshift", "master", "etcd_use_ssl", default=False)
+ etcd_port = self.get_var("openshift", "master", "etcd_port", default=2379)
+ etcd_hosts = self.get_var("openshift", "master", "etcd_hosts")
- config_base = get_var(task_vars, "openshift", "common", "config_base")
+ config_base = self.get_var("openshift", "common", "config_base")
- cert = task_vars.get("etcd_client_cert", config_base + "/master/master.etcd-client.crt")
- key = task_vars.get("etcd_client_key", config_base + "/master/master.etcd-client.key")
- ca_cert = task_vars.get("etcd_client_ca_cert", config_base + "/master/master.etcd-ca.crt")
+ cert = self.get_var("etcd_client_cert", default=config_base + "/master/master.etcd-client.crt")
+ key = self.get_var("etcd_client_key", default=config_base + "/master/master.etcd-client.key")
+ ca_cert = self.get_var("etcd_client_ca_cert", default=config_base + "/master/master.etcd-ca.crt")
for etcd_host in list(etcd_hosts):
args = {
@@ -46,7 +47,7 @@ class EtcdImageDataSize(OpenShiftCheck):
},
}
- etcdkeysize = self.module_executor("etcdkeysize", args, task_vars)
+ etcdkeysize = self.execute_module("etcdkeysize", args)
if etcdkeysize.get("rc", 0) != 0 or etcdkeysize.get("failed"):
msg = 'Failed to retrieve stats for etcd host "{host}": {reason}'
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
new file mode 100644
index 000000000..cc1b14d8a
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
@@ -0,0 +1,44 @@
+"""Check that scans journalctl for messages caused as a symptom of increased etcd traffic."""
+
+from openshift_checks import OpenShiftCheck
+
+
+class EtcdTraffic(OpenShiftCheck):
+ """Check if host is being affected by an increase in etcd traffic."""
+
+ name = "etcd_traffic"
+ tags = ["health", "etcd"]
+
+ def is_active(self):
+ """Skip hosts that do not have etcd in their group names."""
+ group_names = self.get_var("group_names", default=[])
+ valid_group_names = "etcd" in group_names
+
+ version = self.get_var("openshift", "common", "short_version")
+ valid_version = version in ("3.4", "3.5", "1.4", "1.5")
+
+ return super(EtcdTraffic, self).is_active() and valid_group_names and valid_version
+
+ def run(self):
+ is_containerized = self.get_var("openshift", "common", "is_containerized")
+ unit = "etcd_container" if is_containerized else "etcd"
+
+ log_matchers = [{
+ "start_regexp": r"Starting Etcd Server",
+ "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
+ "unit": unit
+ }]
+
+ match = self.execute_module("search_journalctl", {"log_matchers": log_matchers})
+
+ if match.get("matched"):
+ msg = ("Higher than normal etcd traffic detected.\n"
+ "OpenShift 3.4 introduced an increase in etcd traffic.\n"
+ "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
+ "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
+ return {"failed": True, "msg": msg}
+
+ if match.get("failed"):
+ return {"failed": True, "msg": "\n".join(match.get("errors"))}
+
+ return {}
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
index 7452c9cc1..da7d0364a 100644
--- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py
+++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
@@ -1,6 +1,6 @@
"""A health check for OpenShift clusters."""
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException
class EtcdVolume(OpenShiftCheck):
@@ -14,21 +14,18 @@ class EtcdVolume(OpenShiftCheck):
# Where to find ectd data, higher priority first.
supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]
- @classmethod
- def is_active(cls, task_vars):
- etcd_hosts = get_var(task_vars, "groups", "etcd", default=[]) or get_var(task_vars, "groups", "masters",
- default=[]) or []
- is_etcd_host = get_var(task_vars, "ansible_ssh_host") in etcd_hosts
- return super(EtcdVolume, cls).is_active(task_vars) and is_etcd_host
+ def is_active(self):
+ etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or []
+ is_etcd_host = self.get_var("ansible_ssh_host") in etcd_hosts
+ return super(EtcdVolume, self).is_active() and is_etcd_host
- def run(self, tmp, task_vars):
- mount_info = self._etcd_mount_info(task_vars)
+ def run(self):
+ mount_info = self._etcd_mount_info()
available = mount_info["size_available"]
total = mount_info["size_total"]
used = total - available
- threshold = get_var(
- task_vars,
+ threshold = self.get_var(
"etcd_device_usage_threshold_percent",
default=self.default_threshold_percent
)
@@ -45,8 +42,8 @@ class EtcdVolume(OpenShiftCheck):
return {"changed": False}
- def _etcd_mount_info(self, task_vars):
- ansible_mounts = get_var(task_vars, "ansible_mounts")
+ def _etcd_mount_info(self):
+ ansible_mounts = self.get_var("ansible_mounts")
mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
for path in self.supported_mount_paths:
diff --git a/roles/openshift_health_checker/openshift_checks/logging/curator.py b/roles/openshift_health_checker/openshift_checks/logging/curator.py
index c9fc59896..f82ae64d7 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/curator.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/curator.py
@@ -1,28 +1,21 @@
-"""
-Module for performing checks on an Curator logging deployment
-"""
+"""Check for an aggregated logging Curator deployment"""
-from openshift_checks import get_var
from openshift_checks.logging.logging import LoggingCheck
class Curator(LoggingCheck):
- """Module that checks an integrated logging Curator deployment"""
+ """Check for an aggregated logging Curator deployment"""
name = "curator"
tags = ["health", "logging"]
logging_namespace = None
- def run(self, tmp, task_vars):
- """Check various things and gather errors. Returns: result as hash"""
-
- self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
+ def run(self):
+ self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging")
curator_pods, error = super(Curator, self).get_pods_for_component(
- self.module_executor,
self.logging_namespace,
"curator",
- task_vars
)
if error:
return {"failed": True, "changed": False, "msg": error}
diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
index 01cb35b81..1e478c04d 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
@@ -1,35 +1,30 @@
-"""
-Module for performing checks on an Elasticsearch logging deployment
-"""
+"""Check for an aggregated logging Elasticsearch deployment"""
import json
import re
-from openshift_checks import get_var
from openshift_checks.logging.logging import LoggingCheck
class Elasticsearch(LoggingCheck):
- """Module that checks an integrated logging Elasticsearch deployment"""
+ """Check for an aggregated logging Elasticsearch deployment"""
name = "elasticsearch"
tags = ["health", "logging"]
logging_namespace = None
- def run(self, tmp, task_vars):
+ def run(self):
"""Check various things and gather errors. Returns: result as hash"""
- self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
+ self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging")
es_pods, error = super(Elasticsearch, self).get_pods_for_component(
- self.execute_module,
self.logging_namespace,
"es",
- task_vars,
)
if error:
return {"failed": True, "changed": False, "msg": error}
- check_error = self.check_elasticsearch(es_pods, task_vars)
+ check_error = self.check_elasticsearch(es_pods)
if check_error:
msg = ("The following Elasticsearch deployment issue was found:"
@@ -41,7 +36,7 @@ class Elasticsearch(LoggingCheck):
return {"failed": False, "changed": False, "msg": 'No problems found with Elasticsearch deployment.'}
def _not_running_elasticsearch_pods(self, es_pods):
- """Returns: list of running pods, list of errors about non-running pods"""
+ """Returns: list of pods that are not running, list of errors about non-running pods"""
not_running = super(Elasticsearch, self).not_running_pods(es_pods)
if not_running:
return not_running, [(
@@ -54,7 +49,7 @@ class Elasticsearch(LoggingCheck):
))]
return not_running, []
- def check_elasticsearch(self, es_pods, task_vars):
+ def check_elasticsearch(self, es_pods):
"""Various checks for elasticsearch. Returns: error string"""
not_running_pods, error_msgs = self._not_running_elasticsearch_pods(es_pods)
running_pods = [pod for pod in es_pods if pod not in not_running_pods]
@@ -65,10 +60,10 @@ class Elasticsearch(LoggingCheck):
}
if not pods_by_name:
return 'No logging Elasticsearch pods were found. Is logging deployed?'
- error_msgs += self._check_elasticsearch_masters(pods_by_name, task_vars)
- error_msgs += self._check_elasticsearch_node_list(pods_by_name, task_vars)
- error_msgs += self._check_es_cluster_health(pods_by_name, task_vars)
- error_msgs += self._check_elasticsearch_diskspace(pods_by_name, task_vars)
+ error_msgs += self._check_elasticsearch_masters(pods_by_name)
+ error_msgs += self._check_elasticsearch_node_list(pods_by_name)
+ error_msgs += self._check_es_cluster_health(pods_by_name)
+ error_msgs += self._check_elasticsearch_diskspace(pods_by_name)
return '\n'.join(error_msgs)
@staticmethod
@@ -76,14 +71,14 @@ class Elasticsearch(LoggingCheck):
base = "exec {name} -- curl -s --cert {base}cert --key {base}key --cacert {base}ca -XGET '{url}'"
return base.format(base="/etc/elasticsearch/secret/admin-", name=pod_name, url=url)
- def _check_elasticsearch_masters(self, pods_by_name, task_vars):
+ def _check_elasticsearch_masters(self, pods_by_name):
"""Check that Elasticsearch masters are sane. Returns: list of error strings"""
es_master_names = set()
error_msgs = []
for pod_name in pods_by_name.keys():
# Compare what each ES node reports as master and compare for split brain
get_master_cmd = self._build_es_curl_cmd(pod_name, "https://localhost:9200/_cat/master")
- master_name_str = self._exec_oc(get_master_cmd, [], task_vars)
+ master_name_str = self._exec_oc(get_master_cmd, [])
master_names = (master_name_str or '').split(' ')
if len(master_names) > 1:
es_master_names.add(master_names[1])
@@ -108,7 +103,7 @@ class Elasticsearch(LoggingCheck):
return error_msgs
- def _check_elasticsearch_node_list(self, pods_by_name, task_vars):
+ def _check_elasticsearch_node_list(self, pods_by_name):
"""Check that reported ES masters are accounted for by pods. Returns: list of error strings"""
if not pods_by_name:
@@ -116,7 +111,7 @@ class Elasticsearch(LoggingCheck):
# get ES cluster nodes
node_cmd = self._build_es_curl_cmd(list(pods_by_name.keys())[0], 'https://localhost:9200/_nodes')
- cluster_node_data = self._exec_oc(node_cmd, [], task_vars)
+ cluster_node_data = self._exec_oc(node_cmd, [])
try:
cluster_nodes = json.loads(cluster_node_data)['nodes']
except (ValueError, KeyError):
@@ -138,12 +133,12 @@ class Elasticsearch(LoggingCheck):
return error_msgs
- def _check_es_cluster_health(self, pods_by_name, task_vars):
+ def _check_es_cluster_health(self, pods_by_name):
"""Exec into the elasticsearch pods and check the cluster health. Returns: list of errors"""
error_msgs = []
for pod_name in pods_by_name.keys():
cluster_health_cmd = self._build_es_curl_cmd(pod_name, 'https://localhost:9200/_cluster/health?pretty=true')
- cluster_health_data = self._exec_oc(cluster_health_cmd, [], task_vars)
+ cluster_health_data = self._exec_oc(cluster_health_cmd, [])
try:
health_res = json.loads(cluster_health_data)
if not health_res or not health_res.get('status'):
@@ -162,7 +157,7 @@ class Elasticsearch(LoggingCheck):
return error_msgs
- def _check_elasticsearch_diskspace(self, pods_by_name, task_vars):
+ def _check_elasticsearch_diskspace(self, pods_by_name):
"""
Exec into an ES pod and query the diskspace on the persistent volume.
Returns: list of errors
@@ -170,7 +165,7 @@ class Elasticsearch(LoggingCheck):
error_msgs = []
for pod_name in pods_by_name.keys():
df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name)
- disk_output = self._exec_oc(df_cmd, [], task_vars)
+ disk_output = self._exec_oc(df_cmd, [])
lines = disk_output.splitlines()
# expecting one header looking like 'IUse% Use%' and one body line
body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$'
@@ -182,7 +177,7 @@ class Elasticsearch(LoggingCheck):
continue
inode_pct, disk_pct = re.match(body_re, lines[1]).groups()
- inode_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_inode_pct', default='90')
+ inode_pct_thresh = self.get_var('openshift_check_efk_es_inode_pct', default='90')
if int(inode_pct) >= int(inode_pct_thresh):
error_msgs.append(
'Inode percent usage on the storage volume for logging ES pod "{pod}"\n'
@@ -193,7 +188,7 @@ class Elasticsearch(LoggingCheck):
limit=str(inode_pct_thresh),
param='openshift_check_efk_es_inode_pct',
))
- disk_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_storage_pct', default='80')
+ disk_pct_thresh = self.get_var('openshift_check_efk_es_storage_pct', default='80')
if int(disk_pct) >= int(disk_pct_thresh):
error_msgs.append(
'Disk percent usage on the storage volume for logging ES pod "{pod}"\n'
@@ -207,11 +202,9 @@ class Elasticsearch(LoggingCheck):
return error_msgs
- def _exec_oc(self, cmd_str, extra_args, task_vars):
+ def _exec_oc(self, cmd_str, extra_args):
return super(Elasticsearch, self).exec_oc(
- self.execute_module,
self.logging_namespace,
cmd_str,
extra_args,
- task_vars,
)
diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py
index 627567293..063e707a9 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py
@@ -1,33 +1,29 @@
-"""
-Module for performing checks on an Fluentd logging deployment
-"""
+"""Check for an aggregated logging Fluentd deployment"""
import json
-from openshift_checks import get_var
from openshift_checks.logging.logging import LoggingCheck
class Fluentd(LoggingCheck):
- """Module that checks an integrated logging Fluentd deployment"""
+ """Check for an aggregated logging Fluentd deployment"""
+
name = "fluentd"
tags = ["health", "logging"]
logging_namespace = None
- def run(self, tmp, task_vars):
+ def run(self):
"""Check various things and gather errors. Returns: result as hash"""
- self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
+ self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging")
fluentd_pods, error = super(Fluentd, self).get_pods_for_component(
- self.execute_module,
self.logging_namespace,
"fluentd",
- task_vars,
)
if error:
return {"failed": True, "changed": False, "msg": error}
- check_error = self.check_fluentd(fluentd_pods, task_vars)
+ check_error = self.check_fluentd(fluentd_pods)
if check_error:
msg = ("The following Fluentd deployment issue was found:"
@@ -53,10 +49,9 @@ class Fluentd(LoggingCheck):
).format(label=node_selector)
return fluentd_nodes, None
- @staticmethod
- def _check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars):
+ def _check_node_labeling(self, nodes_by_name, fluentd_nodes, node_selector):
"""Note if nodes are not labeled as expected. Returns: error string"""
- intended_nodes = get_var(task_vars, 'openshift_logging_fluentd_hosts', default=['--all'])
+ intended_nodes = self.get_var('openshift_logging_fluentd_hosts', default=['--all'])
if not intended_nodes or '--all' in intended_nodes:
intended_nodes = nodes_by_name.keys()
nodes_missing_labels = set(intended_nodes) - set(fluentd_nodes.keys())
@@ -114,13 +109,15 @@ class Fluentd(LoggingCheck):
))
return None
- def check_fluentd(self, pods, task_vars):
+ def check_fluentd(self, pods):
"""Verify fluentd is running everywhere. Returns: error string"""
- node_selector = get_var(task_vars, 'openshift_logging_fluentd_nodeselector',
- default='logging-infra-fluentd=true')
+ node_selector = self.get_var(
+ 'openshift_logging_fluentd_nodeselector',
+ default='logging-infra-fluentd=true'
+ )
- nodes_by_name, error = self.get_nodes_by_name(task_vars)
+ nodes_by_name, error = self.get_nodes_by_name()
if error:
return error
@@ -129,7 +126,7 @@ class Fluentd(LoggingCheck):
return error
error_msgs = []
- error = self._check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars)
+ error = self._check_node_labeling(nodes_by_name, fluentd_nodes, node_selector)
if error:
error_msgs.append(error)
error = self._check_nodes_have_fluentd(pods, fluentd_nodes)
@@ -148,9 +145,9 @@ class Fluentd(LoggingCheck):
return '\n'.join(error_msgs)
- def get_nodes_by_name(self, task_vars):
+ def get_nodes_by_name(self):
"""Retrieve all the node definitions. Returns: dict(name: node), error string"""
- nodes_json = self._exec_oc("get nodes -o json", [], task_vars)
+ nodes_json = self._exec_oc("get nodes -o json", [])
try:
nodes = json.loads(nodes_json)
except ValueError: # no valid json - should not happen
@@ -162,9 +159,9 @@ class Fluentd(LoggingCheck):
for node in nodes['items']
}, None
- def _exec_oc(self, cmd_str, extra_args, task_vars):
- return super(Fluentd, self).exec_oc(self.execute_module,
- self.logging_namespace,
- cmd_str,
- extra_args,
- task_vars)
+ def _exec_oc(self, cmd_str, extra_args):
+ return super(Fluentd, self).exec_oc(
+ self.logging_namespace,
+ cmd_str,
+ extra_args,
+ )
diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
index 442f407b1..60f94e106 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
@@ -12,7 +12,6 @@ except ImportError:
from urllib.error import HTTPError, URLError
import urllib.request as urllib2
-from openshift_checks import get_var
from openshift_checks.logging.logging import LoggingCheck
@@ -24,22 +23,20 @@ class Kibana(LoggingCheck):
logging_namespace = None
- def run(self, tmp, task_vars):
+ def run(self):
"""Check various things and gather errors. Returns: result as hash"""
- self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
+ self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging")
kibana_pods, error = super(Kibana, self).get_pods_for_component(
- self.execute_module,
self.logging_namespace,
"kibana",
- task_vars,
)
if error:
return {"failed": True, "changed": False, "msg": error}
check_error = self.check_kibana(kibana_pods)
if not check_error:
- check_error = self._check_kibana_route(task_vars)
+ check_error = self._check_kibana_route()
if check_error:
msg = ("The following Kibana deployment issue was found:"
@@ -50,7 +47,7 @@ class Kibana(LoggingCheck):
# TODO(lmeyer): run it all again for the ops cluster
return {"failed": False, "changed": False, "msg": 'No problems found with Kibana deployment.'}
- def _verify_url_internal(self, url, task_vars):
+ def _verify_url_internal(self, url):
"""
Try to reach a URL from the host.
Returns: success (bool), reason (for failure)
@@ -62,7 +59,7 @@ class Kibana(LoggingCheck):
# TODO(lmeyer): give users option to validate certs
status_code=302,
)
- result = self.execute_module('uri', args, task_vars)
+ result = self.execute_module('uri', args)
if result.get('failed'):
return result['msg']
return None
@@ -114,14 +111,14 @@ class Kibana(LoggingCheck):
return None
- def _get_kibana_url(self, task_vars):
+ def _get_kibana_url(self):
"""
Get kibana route or report error.
Returns: url (or empty), reason for failure
"""
# Get logging url
- get_route = self._exec_oc("get route logging-kibana -o json", [], task_vars)
+ get_route = self._exec_oc("get route logging-kibana -o json", [])
if not get_route:
return None, 'no_route_exists'
@@ -139,7 +136,7 @@ class Kibana(LoggingCheck):
return 'https://{}/'.format(host), None
- def _check_kibana_route(self, task_vars):
+ def _check_kibana_route(self):
"""
Check to see if kibana route is up and working.
Returns: error string
@@ -160,12 +157,12 @@ class Kibana(LoggingCheck):
),
)
- kibana_url, error = self._get_kibana_url(task_vars)
+ kibana_url, error = self._get_kibana_url()
if not kibana_url:
return known_errors.get(error, error)
# first, check that kibana is reachable from the master.
- error = self._verify_url_internal(kibana_url, task_vars)
+ error = self._verify_url_internal(kibana_url)
if error:
if 'urlopen error [Errno 111] Connection refused' in error:
error = (
@@ -190,7 +187,7 @@ class Kibana(LoggingCheck):
# in production we would like the kibana route to work from outside the
# cluster too; but that may not be the case, so allow disabling just this part.
- if not get_var(task_vars, "openshift_check_efk_kibana_external", default=True):
+ if not self.get_var("openshift_check_efk_kibana_external", default=True):
return None
error = self._verify_url_external(kibana_url)
if error:
@@ -221,9 +218,9 @@ class Kibana(LoggingCheck):
return error
return None
- def _exec_oc(self, cmd_str, extra_args, task_vars):
- return super(Kibana, self).exec_oc(self.execute_module,
- self.logging_namespace,
- cmd_str,
- extra_args,
- task_vars)
+ def _exec_oc(self, cmd_str, extra_args):
+ return super(Kibana, self).exec_oc(
+ self.logging_namespace,
+ cmd_str,
+ extra_args,
+ )
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py
index 05b4d300c..a48e1c728 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/logging.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py
@@ -5,39 +5,36 @@ Util functions for performing checks on an Elasticsearch, Fluentd, and Kibana st
import json
import os
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException
class LoggingCheck(OpenShiftCheck):
- """Base class for logging component checks"""
+ """Base class for OpenShift aggregated logging component checks"""
name = "logging"
+ logging_namespace = "logging"
- @classmethod
- def is_active(cls, task_vars):
- return super(LoggingCheck, cls).is_active(task_vars) and cls.is_first_master(task_vars)
+ def is_active(self):
+ logging_deployed = self.get_var("openshift_hosted_logging_deploy", default=False)
+ return logging_deployed and super(LoggingCheck, self).is_active() and self.is_first_master()
- @staticmethod
- def is_first_master(task_vars):
- """Run only on first master and only when logging is configured. Returns: bool"""
- logging_deployed = get_var(task_vars, "openshift_hosted_logging_deploy", default=True)
+ def is_first_master(self):
+ """Determine if running on first master. Returns: bool"""
# Note: It would be nice to use membership in oo_first_master group, however for now it
# seems best to avoid requiring that setup and just check this is the first master.
- hostname = get_var(task_vars, "ansible_ssh_host") or [None]
- masters = get_var(task_vars, "groups", "masters", default=None) or [None]
- return logging_deployed and masters[0] == hostname
+ hostname = self.get_var("ansible_ssh_host") or [None]
+ masters = self.get_var("groups", "masters", default=None) or [None]
+ return masters[0] == hostname
- def run(self, tmp, task_vars):
+ def run(self):
pass
- def get_pods_for_component(self, execute_module, namespace, logging_component, task_vars):
+ def get_pods_for_component(self, namespace, logging_component):
"""Get all pods for a given component. Returns: list of pods for component, error string"""
pod_output = self.exec_oc(
- execute_module,
namespace,
"get pods -l component={} -o json".format(logging_component),
[],
- task_vars
)
try:
pods = json.loads(pod_output)
@@ -45,7 +42,7 @@ class LoggingCheck(OpenShiftCheck):
raise ValueError()
except ValueError:
# successful run but non-parsing data generally means there were no pods in the namespace
- return None, 'There are no pods in the {} namespace. Is logging deployed?'.format(namespace)
+ return None, 'No pods were found for the "{}" logging component.'.format(logging_component)
return pods['items'], None
@@ -54,23 +51,22 @@ class LoggingCheck(OpenShiftCheck):
"""Returns: list of pods not in a ready and running state"""
return [
pod for pod in pods
- if any(
+ if not pod.get("status", {}).get("containerStatuses") or any(
container['ready'] is False
for container in pod['status']['containerStatuses']
) or not any(
condition['type'] == 'Ready' and condition['status'] == 'True'
- for condition in pod['status']['conditions']
+ for condition in pod['status'].get('conditions', [])
)
]
- @staticmethod
- def exec_oc(execute_module=None, namespace="logging", cmd_str="", extra_args=None, task_vars=None):
+ def exec_oc(self, namespace="logging", cmd_str="", extra_args=None):
"""
Execute an 'oc' command in the remote host.
Returns: output of command and namespace,
or raises OpenShiftCheckException on error
"""
- config_base = get_var(task_vars, "openshift", "common", "config_base")
+ config_base = self.get_var("openshift", "common", "config_base")
args = {
"namespace": namespace,
"config_file": os.path.join(config_base, "master", "admin.kubeconfig"),
@@ -78,7 +74,7 @@ class LoggingCheck(OpenShiftCheck):
"extra_args": list(extra_args) if extra_args else [],
}
- result = execute_module("ocutil", args, task_vars)
+ result = self.execute_module("ocutil", args)
if result.get("failed"):
msg = (
'Unexpected error using `oc` to validate the logging stack components.\n'
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py
new file mode 100644
index 000000000..b24e88e05
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py
@@ -0,0 +1,130 @@
+"""
+Check for ensuring logs from pods can be queried in a reasonable amount of time.
+"""
+
+import json
+import time
+
+from uuid import uuid4
+
+from openshift_checks import OpenShiftCheckException
+from openshift_checks.logging.logging import LoggingCheck
+
+
+ES_CMD_TIMEOUT_SECONDS = 30
+
+
+class LoggingIndexTime(LoggingCheck):
+ """Check that pod logs are aggregated and indexed in ElasticSearch within a reasonable amount of time."""
+ name = "logging_index_time"
+ tags = ["health", "logging"]
+
+ logging_namespace = "logging"
+
+ def run(self):
+ """Add log entry by making unique request to Kibana. Check for unique entry in the ElasticSearch pod logs."""
+ try:
+ log_index_timeout = int(
+ self.get_var("openshift_check_logging_index_timeout_seconds", default=ES_CMD_TIMEOUT_SECONDS)
+ )
+ except ValueError:
+ return {
+ "failed": True,
+ "msg": ('Invalid value provided for "openshift_check_logging_index_timeout_seconds". '
+ 'Value must be an integer representing an amount in seconds.'),
+ }
+
+ running_component_pods = dict()
+
+ # get all component pods
+ self.logging_namespace = self.get_var("openshift_logging_namespace", default=self.logging_namespace)
+ for component, name in (['kibana', 'Kibana'], ['es', 'Elasticsearch']):
+ pods, error = self.get_pods_for_component(self.logging_namespace, component)
+
+ if error:
+ msg = 'Unable to retrieve pods for the {} logging component: {}'
+ return {"failed": True, "changed": False, "msg": msg.format(name, error)}
+
+ running_pods = self.running_pods(pods)
+
+ if not running_pods:
+ msg = ('No {} pods in the "Running" state were found.'
+ 'At least one pod is required in order to perform this check.')
+ return {"failed": True, "changed": False, "msg": msg.format(name)}
+
+ running_component_pods[component] = running_pods
+
+ uuid = self.curl_kibana_with_uuid(running_component_pods["kibana"][0])
+ self.wait_until_cmd_or_err(running_component_pods["es"][0], uuid, log_index_timeout)
+ return {}
+
+ def wait_until_cmd_or_err(self, es_pod, uuid, timeout_secs):
+ """Retry an Elasticsearch query every second until query success, or a defined
+ length of time has passed."""
+ deadline = time.time() + timeout_secs
+ interval = 1
+ while not self.query_es_from_es(es_pod, uuid):
+ if time.time() + interval > deadline:
+ msg = "expecting match in Elasticsearch for message with uuid {}, but no matches were found after {}s."
+ raise OpenShiftCheckException(msg.format(uuid, timeout_secs))
+ time.sleep(interval)
+
+ def curl_kibana_with_uuid(self, kibana_pod):
+ """curl Kibana with a unique uuid."""
+ uuid = self.generate_uuid()
+ pod_name = kibana_pod["metadata"]["name"]
+ exec_cmd = "exec {pod_name} -c kibana -- curl --max-time 30 -s http://localhost:5601/{uuid}"
+ exec_cmd = exec_cmd.format(pod_name=pod_name, uuid=uuid)
+
+ error_str = self.exec_oc(self.logging_namespace, exec_cmd, [])
+
+ try:
+ error_code = json.loads(error_str)["statusCode"]
+ except KeyError:
+ msg = ('invalid response returned from Kibana request (Missing "statusCode" key):\n'
+ 'Command: {}\nResponse: {}').format(exec_cmd, error_str)
+ raise OpenShiftCheckException(msg)
+ except ValueError:
+ msg = ('invalid response returned from Kibana request (Non-JSON output):\n'
+ 'Command: {}\nResponse: {}').format(exec_cmd, error_str)
+ raise OpenShiftCheckException(msg)
+
+ if error_code != 404:
+ msg = 'invalid error code returned from Kibana request. Expecting error code "404", but got "{}" instead.'
+ raise OpenShiftCheckException(msg.format(error_code))
+
+ return uuid
+
+ def query_es_from_es(self, es_pod, uuid):
+ """curl the Elasticsearch pod and look for a unique uuid in its logs."""
+ pod_name = es_pod["metadata"]["name"]
+ exec_cmd = (
+ "exec {pod_name} -- curl --max-time 30 -s -f "
+ "--cacert /etc/elasticsearch/secret/admin-ca "
+ "--cert /etc/elasticsearch/secret/admin-cert "
+ "--key /etc/elasticsearch/secret/admin-key "
+ "https://logging-es:9200/project.{namespace}*/_count?q=message:{uuid}"
+ )
+ exec_cmd = exec_cmd.format(pod_name=pod_name, namespace=self.logging_namespace, uuid=uuid)
+ result = self.exec_oc(self.logging_namespace, exec_cmd, [])
+
+ try:
+ count = json.loads(result)["count"]
+ except KeyError:
+ msg = 'invalid response from Elasticsearch query:\n"{}"\nMissing "count" key:\n{}'
+ raise OpenShiftCheckException(msg.format(exec_cmd, result))
+ except ValueError:
+ msg = 'invalid response from Elasticsearch query:\n"{}"\nNon-JSON output:\n{}'
+ raise OpenShiftCheckException(msg.format(exec_cmd, result))
+
+ return count
+
+ @staticmethod
+ def running_pods(pods):
+ """Filter pods that are running."""
+ return [pod for pod in pods if pod['status']['phase'] == 'Running']
+
+ @staticmethod
+ def generate_uuid():
+ """Wrap uuid generator. Allows for testing with expected values."""
+ return str(uuid4())
diff --git a/roles/openshift_health_checker/openshift_checks/memory_availability.py b/roles/openshift_health_checker/openshift_checks/memory_availability.py
index f4e31065f..765ba072d 100644
--- a/roles/openshift_health_checker/openshift_checks/memory_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/memory_availability.py
@@ -1,5 +1,5 @@
-# pylint: disable=missing-docstring
-from openshift_checks import OpenShiftCheck, get_var
+"""Check that recommended memory is available."""
+from openshift_checks import OpenShiftCheck
MIB = 2**20
GIB = 2**30
@@ -21,19 +21,18 @@ class MemoryAvailability(OpenShiftCheck):
# https://access.redhat.com/solutions/3006511 physical RAM is partly reserved from memtotal
memtotal_adjustment = 1 * GIB
- @classmethod
- def is_active(cls, task_vars):
+ def is_active(self):
"""Skip hosts that do not have recommended memory requirements."""
- group_names = get_var(task_vars, "group_names", default=[])
- has_memory_recommendation = bool(set(group_names).intersection(cls.recommended_memory_bytes))
- return super(MemoryAvailability, cls).is_active(task_vars) and has_memory_recommendation
+ group_names = self.get_var("group_names", default=[])
+ has_memory_recommendation = bool(set(group_names).intersection(self.recommended_memory_bytes))
+ return super(MemoryAvailability, self).is_active() and has_memory_recommendation
- def run(self, tmp, task_vars):
- group_names = get_var(task_vars, "group_names")
- total_memory_bytes = get_var(task_vars, "ansible_memtotal_mb") * MIB
+ def run(self):
+ group_names = self.get_var("group_names")
+ total_memory_bytes = self.get_var("ansible_memtotal_mb") * MIB
recommended_min = max(self.recommended_memory_bytes.get(name, 0) for name in group_names)
- configured_min = float(get_var(task_vars, "openshift_check_min_host_memory_gb", default=0)) * GIB
+ configured_min = float(self.get_var("openshift_check_min_host_memory_gb", default=0)) * GIB
min_memory_bytes = configured_min or recommended_min
if total_memory_bytes + self.memtotal_adjustment < min_memory_bytes:
diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py
index 7f3d78cc4..3b2c64e6a 100644
--- a/roles/openshift_health_checker/openshift_checks/mixins.py
+++ b/roles/openshift_health_checker/openshift_checks/mixins.py
@@ -2,19 +2,16 @@
Mixin classes meant to be used with subclasses of OpenShiftCheck.
"""
-from openshift_checks import get_var
-
class NotContainerizedMixin(object):
"""Mixin for checks that are only active when not in containerized mode."""
# permanent # pylint: disable=too-few-public-methods
# Reason: The mixin is not intended to stand on its own as a class.
- @classmethod
- def is_active(cls, task_vars):
+ def is_active(self):
"""Only run on non-containerized hosts."""
- is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
- return super(NotContainerizedMixin, cls).is_active(task_vars) and not is_containerized
+ is_containerized = self.get_var("openshift", "common", "is_containerized")
+ return super(NotContainerizedMixin, self).is_active() and not is_containerized
class DockerHostMixin(object):
@@ -22,26 +19,27 @@ class DockerHostMixin(object):
dependencies = []
- @classmethod
- def is_active(cls, task_vars):
+ def is_active(self):
"""Only run on hosts that depend on Docker."""
- is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
- is_node = "nodes" in get_var(task_vars, "group_names", default=[])
- return super(DockerHostMixin, cls).is_active(task_vars) and (is_containerized or is_node)
+ is_containerized = self.get_var("openshift", "common", "is_containerized")
+ is_node = "nodes" in self.get_var("group_names", default=[])
+ return super(DockerHostMixin, self).is_active() and (is_containerized or is_node)
- def ensure_dependencies(self, task_vars):
+ def ensure_dependencies(self):
"""
Ensure that docker-related packages exist, but not on atomic hosts
(which would not be able to install but should already have them).
Returns: msg, failed, changed
"""
- if get_var(task_vars, "openshift", "common", "is_atomic"):
+ if self.get_var("openshift", "common", "is_atomic"):
return "", False, False
# NOTE: we would use the "package" module but it's actually an action plugin
# and it's not clear how to invoke one of those. This is about the same anyway:
- pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum")
- result = self.module_executor(pkg_manager, {"name": self.dependencies, "state": "present"}, task_vars)
+ result = self.execute_module(
+ self.get_var("ansible_pkg_mgr", default="yum"),
+ {"name": self.dependencies, "state": "present"},
+ )
msg = result.get("msg", "")
if result.get("failed"):
if "No package matching" in msg:
diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py
index 1e45ae3af..cd6ebd493 100644
--- a/roles/openshift_health_checker/openshift_checks/ovs_version.py
+++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py
@@ -3,7 +3,7 @@ Ansible module for determining if an installed version of Open vSwitch is incomp
currently installed version of OpenShift.
"""
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException
from openshift_checks.mixins import NotContainerizedMixin
@@ -27,27 +27,26 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck):
"1": "3",
}
- @classmethod
- def is_active(cls, task_vars):
+ def is_active(self):
"""Skip hosts that do not have package requirements."""
- group_names = get_var(task_vars, "group_names", default=[])
+ group_names = self.get_var("group_names", default=[])
master_or_node = 'masters' in group_names or 'nodes' in group_names
- return super(OvsVersion, cls).is_active(task_vars) and master_or_node
+ return super(OvsVersion, self).is_active() and master_or_node
- def run(self, tmp, task_vars):
+ def run(self):
args = {
"package_list": [
{
"name": "openvswitch",
- "version": self.get_required_ovs_version(task_vars),
+ "version": self.get_required_ovs_version(),
},
],
}
- return self.execute_module("rpm_version", args, task_vars)
+ return self.execute_module("rpm_version", args)
- def get_required_ovs_version(self, task_vars):
+ def get_required_ovs_version(self):
"""Return the correct Open vSwitch version for the current OpenShift version"""
- openshift_version = self._get_openshift_version(task_vars)
+ openshift_version = self._get_openshift_version()
if float(openshift_version) < 3.5:
return self.openshift_to_ovs_version["3.4"]
@@ -59,8 +58,8 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck):
msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}"
raise OpenShiftCheckException(msg.format(openshift_version))
- def _get_openshift_version(self, task_vars):
- openshift_version = get_var(task_vars, "openshift_image_tag")
+ def _get_openshift_version(self):
+ openshift_version = self.get_var("openshift_image_tag")
if openshift_version and openshift_version[0] == 'v':
openshift_version = openshift_version[1:]
diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py
index a7eb720fd..a86180b00 100644
--- a/roles/openshift_health_checker/openshift_checks/package_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/package_availability.py
@@ -1,5 +1,6 @@
-# pylint: disable=missing-docstring
-from openshift_checks import OpenShiftCheck, get_var
+"""Check that required RPM packages are available."""
+
+from openshift_checks import OpenShiftCheck
from openshift_checks.mixins import NotContainerizedMixin
@@ -9,13 +10,13 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
name = "package_availability"
tags = ["preflight"]
- @classmethod
- def is_active(cls, task_vars):
- return super(PackageAvailability, cls).is_active(task_vars) and task_vars["ansible_pkg_mgr"] == "yum"
+ def is_active(self):
+ """Run only when yum is the package manager as the code is specific to it."""
+ return super(PackageAvailability, self).is_active() and self.get_var("ansible_pkg_mgr") == "yum"
- def run(self, tmp, task_vars):
- rpm_prefix = get_var(task_vars, "openshift", "common", "service_type")
- group_names = get_var(task_vars, "group_names", default=[])
+ def run(self):
+ rpm_prefix = self.get_var("openshift", "common", "service_type")
+ group_names = self.get_var("group_names", default=[])
packages = set()
@@ -25,10 +26,11 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
packages.update(self.node_packages(rpm_prefix))
args = {"packages": sorted(set(packages))}
- return self.execute_module("check_yum_update", args, tmp, task_vars)
+ return self.execute_module("check_yum_update", args)
@staticmethod
def master_packages(rpm_prefix):
+ """Return a list of RPMs that we expect a master install to have available."""
return [
"{rpm_prefix}".format(rpm_prefix=rpm_prefix),
"{rpm_prefix}-clients".format(rpm_prefix=rpm_prefix),
@@ -36,8 +38,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
"bash-completion",
"cockpit-bridge",
"cockpit-docker",
- "cockpit-kubernetes",
- "cockpit-shell",
+ "cockpit-system",
"cockpit-ws",
"etcd",
"httpd-tools",
@@ -45,6 +46,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
@staticmethod
def node_packages(rpm_prefix):
+ """Return a list of RPMs that we expect a node install to have available."""
return [
"{rpm_prefix}".format(rpm_prefix=rpm_prefix),
"{rpm_prefix}-node".format(rpm_prefix=rpm_prefix),
diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py
index fd0c0a755..1e9aecbe0 100644
--- a/roles/openshift_health_checker/openshift_checks/package_update.py
+++ b/roles/openshift_health_checker/openshift_checks/package_update.py
@@ -1,14 +1,14 @@
-# pylint: disable=missing-docstring
+"""Check that a yum update would not run into conflicts with available packages."""
from openshift_checks import OpenShiftCheck
from openshift_checks.mixins import NotContainerizedMixin
class PackageUpdate(NotContainerizedMixin, OpenShiftCheck):
- """Check that there are no conflicts in RPM packages."""
+ """Check that a yum update would not run into conflicts with available packages."""
name = "package_update"
tags = ["preflight"]
- def run(self, tmp, task_vars):
+ def run(self):
args = {"packages": []}
- return self.execute_module("check_yum_update", args, tmp, task_vars)
+ return self.execute_module("check_yum_update", args)
diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py
index 2e737818b..020786804 100644
--- a/roles/openshift_health_checker/openshift_checks/package_version.py
+++ b/roles/openshift_health_checker/openshift_checks/package_version.py
@@ -1,5 +1,5 @@
-# pylint: disable=missing-docstring
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+"""Check that available RPM packages match the required versions."""
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException
from openshift_checks.mixins import NotContainerizedMixin
@@ -10,8 +10,8 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
tags = ["preflight"]
openshift_to_ovs_version = {
- "3.6": "2.6",
- "3.5": "2.6",
+ "3.6": ["2.6", "2.7"],
+ "3.5": ["2.6", "2.7"],
"3.4": "2.4",
}
@@ -28,29 +28,28 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
"1": "3",
}
- @classmethod
- def is_active(cls, task_vars):
+ def is_active(self):
"""Skip hosts that do not have package requirements."""
- group_names = get_var(task_vars, "group_names", default=[])
+ group_names = self.get_var("group_names", default=[])
master_or_node = 'masters' in group_names or 'nodes' in group_names
- return super(PackageVersion, cls).is_active(task_vars) and master_or_node
+ return super(PackageVersion, self).is_active() and master_or_node
- def run(self, tmp, task_vars):
- rpm_prefix = get_var(task_vars, "openshift", "common", "service_type")
- openshift_release = get_var(task_vars, "openshift_release", default='')
- deployment_type = get_var(task_vars, "openshift_deployment_type")
+ def run(self):
+ rpm_prefix = self.get_var("openshift", "common", "service_type")
+ openshift_release = self.get_var("openshift_release", default='')
+ deployment_type = self.get_var("openshift_deployment_type")
check_multi_minor_release = deployment_type in ['openshift-enterprise']
args = {
"package_list": [
{
"name": "openvswitch",
- "version": self.get_required_ovs_version(task_vars),
+ "version": self.get_required_ovs_version(),
"check_multi": False,
},
{
"name": "docker",
- "version": self.get_required_docker_version(task_vars),
+ "version": self.get_required_docker_version(),
"check_multi": False,
},
{
@@ -71,13 +70,13 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
],
}
- return self.execute_module("aos_version", args, tmp, task_vars)
+ return self.execute_module("aos_version", args)
- def get_required_ovs_version(self, task_vars):
+ def get_required_ovs_version(self):
"""Return the correct Open vSwitch version for the current OpenShift version.
If the current OpenShift version is >= 3.5, ensure Open vSwitch version 2.6,
Else ensure Open vSwitch version 2.4"""
- openshift_version = self.get_openshift_version(task_vars)
+ openshift_version = self.get_openshift_version()
if float(openshift_version) < 3.5:
return self.openshift_to_ovs_version["3.4"]
@@ -89,12 +88,12 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}"
raise OpenShiftCheckException(msg.format(openshift_version))
- def get_required_docker_version(self, task_vars):
+ def get_required_docker_version(self):
"""Return the correct Docker version for the current OpenShift version.
If the OpenShift version is 3.1, ensure Docker version 1.8.
If the OpenShift version is 3.2 or 3.3, ensure Docker version 1.10.
If the current OpenShift version is >= 3.4, ensure Docker version 1.12."""
- openshift_version = self.get_openshift_version(task_vars)
+ openshift_version = self.get_openshift_version()
if float(openshift_version) >= 3.4:
return self.openshift_to_docker_version["3.4"]
@@ -106,14 +105,16 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
msg = "There is no recommended version of Docker for the current version of OpenShift: {}"
raise OpenShiftCheckException(msg.format(openshift_version))
- def get_openshift_version(self, task_vars):
- openshift_version = get_var(task_vars, "openshift_image_tag")
+ def get_openshift_version(self):
+ """Return received image tag as a normalized X.Y minor version string."""
+ openshift_version = self.get_var("openshift_image_tag")
if openshift_version and openshift_version[0] == 'v':
openshift_version = openshift_version[1:]
return self.parse_version(openshift_version)
def parse_version(self, version):
+ """Return a normalized X.Y minor version string."""
components = version.split(".")
if not components or len(components) < 2:
msg = "An invalid version of OpenShift was found for this host: {}"
diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py
index 6ebf0ebb2..2d068be3d 100644
--- a/roles/openshift_health_checker/test/action_plugin_test.py
+++ b/roles/openshift_health_checker/test/action_plugin_test.py
@@ -15,14 +15,13 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru
name = _name
tags = _tags or []
- def __init__(self, execute_module=None):
+ def __init__(self, execute_module=None, task_vars=None, tmp=None):
pass
- @classmethod
- def is_active(cls, task_vars):
+ def is_active(self):
return is_active
- def run(self, tmp, task_vars):
+ def run(self):
if run_exception is not None:
raise run_exception
return run_return
@@ -59,7 +58,7 @@ def failed(result, msg_has=None):
if msg_has is not None:
assert 'msg' in result
for term in msg_has:
- assert term in result['msg']
+ assert term.lower() in result['msg'].lower()
return result.get('failed', False)
@@ -124,7 +123,7 @@ def test_action_plugin_skip_disabled_checks(plugin, task_vars, monkeypatch):
def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch):
check_return_value = {'ok': 'test'}
check_class = fake_check(run_return=check_return_value)
- monkeypatch.setattr(plugin, 'load_known_checks', lambda: {'fake_check': check_class()})
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
result = plugin.run(tmp=None, task_vars=task_vars)
@@ -138,7 +137,7 @@ def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch):
def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch):
check_return_value = {'ok': 'test', 'changed': True}
check_class = fake_check(run_return=check_return_value)
- monkeypatch.setattr(plugin, 'load_known_checks', lambda: {'fake_check': check_class()})
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
result = plugin.run(tmp=None, task_vars=task_vars)
@@ -152,7 +151,7 @@ def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch):
def test_action_plugin_run_check_fail(plugin, task_vars, monkeypatch):
check_return_value = {'failed': True}
check_class = fake_check(run_return=check_return_value)
- monkeypatch.setattr(plugin, 'load_known_checks', lambda: {'fake_check': check_class()})
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
result = plugin.run(tmp=None, task_vars=task_vars)
@@ -167,7 +166,7 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch):
exception_msg = 'fake check has an exception'
run_exception = OpenShiftCheckException(exception_msg)
check_class = fake_check(run_exception=run_exception)
- monkeypatch.setattr(plugin, 'load_known_checks', lambda: {'fake_check': check_class()})
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
result = plugin.run(tmp=None, task_vars=task_vars)
@@ -178,6 +177,16 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch):
assert not skipped(result)
+def test_action_plugin_resolve_checks_exception(plugin, task_vars, monkeypatch):
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {})
+
+ result = plugin.run(tmp=None, task_vars=task_vars)
+
+ assert failed(result, msg_has=['unknown', 'name'])
+ assert not changed(result)
+ assert not skipped(result)
+
+
@pytest.mark.parametrize('names,all_checks,expected', [
([], [], set()),
(
diff --git a/roles/openshift_health_checker/test/aos_version_test.py b/roles/openshift_health_checker/test/aos_version_test.py
index 697805dd2..4100f6c70 100644
--- a/roles/openshift_health_checker/test/aos_version_test.py
+++ b/roles/openshift_health_checker/test/aos_version_test.py
@@ -18,7 +18,43 @@ expected_pkgs = {
}
-@pytest.mark.parametrize('pkgs, expect_not_found', [
+@pytest.mark.parametrize('pkgs,expected_pkgs_dict', [
+ (
+ # all found
+ [Package('spam', '3.2.1'), Package('eggs', '3.2.1')],
+ expected_pkgs,
+ ),
+ (
+ # found with more specific version
+ [Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')],
+ expected_pkgs,
+ ),
+ (
+ [Package('ovs', '2.6'), Package('ovs', '2.4')],
+ {
+ "ovs": {
+ "name": "ovs",
+ "version": ["2.6", "2.7"],
+ "check_multi": False,
+ }
+ },
+ ),
+ (
+ [Package('ovs', '2.7')],
+ {
+ "ovs": {
+ "name": "ovs",
+ "version": ["2.6", "2.7"],
+ "check_multi": False,
+ }
+ },
+ ),
+])
+def test_check_precise_version_found(pkgs, expected_pkgs_dict):
+ aos_version._check_precise_version_found(pkgs, expected_pkgs_dict)
+
+
+@pytest.mark.parametrize('pkgs,expect_not_found', [
(
[],
{
@@ -55,14 +91,6 @@ expected_pkgs = {
}, # not the right version
),
(
- [Package('spam', '3.2.1'), Package('eggs', '3.2.1')],
- {}, # all found
- ),
- (
- [Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')],
- {}, # found with more specific version
- ),
- (
[Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5')],
{
"spam": {
@@ -73,64 +101,86 @@ expected_pkgs = {
}, # eggs found with multiple versions
),
])
-def test_check_pkgs_for_precise_version(pkgs, expect_not_found):
- if expect_not_found:
- with pytest.raises(aos_version.PreciseVersionNotFound) as e:
- aos_version._check_precise_version_found(pkgs, expected_pkgs)
-
- assert list(expect_not_found.values()) == e.value.problem_pkgs
- else:
+def test_check_precise_version_found_fail(pkgs, expect_not_found):
+ with pytest.raises(aos_version.PreciseVersionNotFound) as e:
aos_version._check_precise_version_found(pkgs, expected_pkgs)
+ assert list(expect_not_found.values()) == e.value.problem_pkgs
-@pytest.mark.parametrize('pkgs, expect_higher', [
+@pytest.mark.parametrize('pkgs,expected_pkgs_dict', [
(
[],
- [],
+ expected_pkgs,
),
(
+ # more precise but not strictly higher
[Package('spam', '3.2.1.9')],
- [], # more precise but not strictly higher
+ expected_pkgs,
),
(
+ [Package('ovs', '2.7')],
+ {
+ "ovs": {
+ "name": "ovs",
+ "version": ["2.6", "2.7"],
+ "check_multi": False,
+ }
+ },
+ ),
+])
+def test_check_higher_version_found(pkgs, expected_pkgs_dict):
+ aos_version._check_higher_version_found(pkgs, expected_pkgs_dict)
+
+
+@pytest.mark.parametrize('pkgs,expected_pkgs_dict,expect_higher', [
+ (
[Package('spam', '3.3')],
+ expected_pkgs,
['spam-3.3'], # lower precision, but higher
),
(
[Package('spam', '3.2.1'), Package('eggs', '3.3.2')],
+ expected_pkgs,
['eggs-3.3.2'], # one too high
),
(
[Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5'), Package('eggs', '3.4')],
+ expected_pkgs,
['eggs-3.4'], # multiple versions, one is higher
),
(
[Package('eggs', '3.2.1'), Package('eggs', '3.4'), Package('eggs', '3.3')],
+ expected_pkgs,
['eggs-3.4'], # multiple versions, two are higher
),
+ (
+ [Package('ovs', '2.8')],
+ {
+ "ovs": {
+ "name": "ovs",
+ "version": ["2.6", "2.7"],
+ "check_multi": False,
+ }
+ },
+ ['ovs-2.8'],
+ ),
])
-def test_check_pkgs_for_greater_version(pkgs, expect_higher):
- if expect_higher:
- with pytest.raises(aos_version.FoundHigherVersion) as e:
- aos_version._check_higher_version_found(pkgs, expected_pkgs)
- assert set(expect_higher) == set(e.value.problem_pkgs)
- else:
- aos_version._check_higher_version_found(pkgs, expected_pkgs)
+def test_check_higher_version_found_fail(pkgs, expected_pkgs_dict, expect_higher):
+ with pytest.raises(aos_version.FoundHigherVersion) as e:
+ aos_version._check_higher_version_found(pkgs, expected_pkgs_dict)
+ assert set(expect_higher) == set(e.value.problem_pkgs)
-@pytest.mark.parametrize('pkgs, expect_to_flag_pkgs', [
- (
- [],
- [],
- ),
- (
- [Package('spam', '3.2.1')],
- [],
- ),
- (
- [Package('spam', '3.2.1'), Package('eggs', '3.2.2')],
- [],
- ),
+@pytest.mark.parametrize('pkgs', [
+ [],
+ [Package('spam', '3.2.1')],
+ [Package('spam', '3.2.1'), Package('eggs', '3.2.2')],
+])
+def test_check_multi_minor_release(pkgs):
+ aos_version._check_multi_minor_release(pkgs, expected_pkgs)
+
+
+@pytest.mark.parametrize('pkgs,expect_to_flag_pkgs', [
(
[Package('spam', '3.2.1'), Package('spam', '3.3.2')],
['spam'],
@@ -140,10 +190,7 @@ def test_check_pkgs_for_greater_version(pkgs, expect_higher):
['eggs'],
),
])
-def test_check_pkgs_for_multi_release(pkgs, expect_to_flag_pkgs):
- if expect_to_flag_pkgs:
- with pytest.raises(aos_version.FoundMultiRelease) as e:
- aos_version._check_multi_minor_release(pkgs, expected_pkgs)
- assert set(expect_to_flag_pkgs) == set(e.value.problem_pkgs)
- else:
+def test_check_multi_minor_release_fail(pkgs, expect_to_flag_pkgs):
+ with pytest.raises(aos_version.FoundMultiRelease) as e:
aos_version._check_multi_minor_release(pkgs, expected_pkgs)
+ assert set(expect_to_flag_pkgs) == set(e.value.problem_pkgs)
diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py
index b353fa610..e98d02c58 100644
--- a/roles/openshift_health_checker/test/disk_availability_test.py
+++ b/roles/openshift_health_checker/test/disk_availability_test.py
@@ -3,24 +3,21 @@ import pytest
from openshift_checks.disk_availability import DiskAvailability, OpenShiftCheckException
-@pytest.mark.parametrize('group_names,is_containerized,is_active', [
- (['masters'], False, True),
- # ensure check is skipped on containerized installs
- (['masters'], True, False),
- (['nodes'], False, True),
- (['etcd'], False, True),
- (['masters', 'nodes'], False, True),
- (['masters', 'etcd'], False, True),
- ([], False, False),
- (['lb'], False, False),
- (['nfs'], False, False),
+@pytest.mark.parametrize('group_names,is_active', [
+ (['masters'], True),
+ (['nodes'], True),
+ (['etcd'], True),
+ (['masters', 'nodes'], True),
+ (['masters', 'etcd'], True),
+ ([], False),
+ (['lb'], False),
+ (['nfs'], False),
])
-def test_is_active(group_names, is_containerized, is_active):
+def test_is_active(group_names, is_active):
task_vars = dict(
group_names=group_names,
- openshift=dict(common=dict(is_containerized=is_containerized)),
)
- assert DiskAvailability.is_active(task_vars=task_vars) == is_active
+ assert DiskAvailability(None, task_vars).is_active() == is_active
@pytest.mark.parametrize('ansible_mounts,extra_words', [
@@ -33,12 +30,11 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
group_names=['masters'],
ansible_mounts=ansible_mounts,
)
- check = DiskAvailability(execute_module=fake_execute_module)
with pytest.raises(OpenShiftCheckException) as excinfo:
- check.run(tmp=None, task_vars=task_vars)
+ DiskAvailability(fake_execute_module, task_vars).run()
- for word in 'determine available disk'.split() + extra_words:
+ for word in 'determine disk availability'.split() + extra_words:
assert word in str(excinfo.value)
@@ -81,7 +77,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
[{
# not enough space on / ...
'mount': '/',
- 'size_available': 0,
+ 'size_available': 2 * 10**9,
}, {
# ... but enough on /var
'mount': '/var',
@@ -96,8 +92,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
ansible_mounts=ansible_mounts,
)
- check = DiskAvailability(execute_module=fake_execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ result = DiskAvailability(fake_execute_module, task_vars).run()
assert not result.get('failed', False)
@@ -171,8 +166,7 @@ def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible
ansible_mounts=ansible_mounts,
)
- check = DiskAvailability(execute_module=fake_execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ result = DiskAvailability(fake_execute_module, task_vars).run()
assert result['failed']
for word in 'below recommended'.split() + extra_words:
diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py
index 197c65f51..8d0a53df9 100644
--- a/roles/openshift_health_checker/test/docker_image_availability_test.py
+++ b/roles/openshift_health_checker/test/docker_image_availability_test.py
@@ -21,7 +21,7 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active
openshift_deployment_type=deployment_type,
group_names=group_names,
)
- assert DockerImageAvailability.is_active(task_vars=task_vars) == expect_active
+ assert DockerImageAvailability(None, task_vars).is_active() == expect_active
@pytest.mark.parametrize("is_containerized,is_atomic", [
@@ -31,18 +31,18 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active
(False, True),
])
def test_all_images_available_locally(is_containerized, is_atomic):
- def execute_module(module_name, args, task_vars):
+ def execute_module(module_name, module_args, *_):
if module_name == "yum":
return {"changed": True}
assert module_name == "docker_image_facts"
- assert 'name' in args
- assert args['name']
+ assert 'name' in module_args
+ assert module_args['name']
return {
- 'images': [args['name']],
+ 'images': [module_args['name']],
}
- result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict(
+ result = DockerImageAvailability(execute_module, task_vars=dict(
openshift=dict(
common=dict(
service_type='origin',
@@ -52,9 +52,9 @@ def test_all_images_available_locally(is_containerized, is_atomic):
docker=dict(additional_registries=["docker.io"]),
),
openshift_deployment_type='origin',
- openshift_release='v3.4',
openshift_image_tag='3.4',
- ))
+ group_names=['nodes', 'masters'],
+ )).run()
assert not result.get('failed', False)
@@ -64,12 +64,12 @@ def test_all_images_available_locally(is_containerized, is_atomic):
True,
])
def test_all_images_available_remotely(available_locally):
- def execute_module(module_name, args, task_vars):
+ def execute_module(module_name, *_):
if module_name == 'docker_image_facts':
return {'images': [], 'failed': available_locally}
return {'changed': False}
- result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict(
+ result = DockerImageAvailability(execute_module, task_vars=dict(
openshift=dict(
common=dict(
service_type='origin',
@@ -79,15 +79,15 @@ def test_all_images_available_remotely(available_locally):
docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]),
),
openshift_deployment_type='origin',
- openshift_release='3.4',
openshift_image_tag='v3.4',
- ))
+ group_names=['nodes', 'masters'],
+ )).run()
assert not result.get('failed', False)
def test_all_images_unavailable():
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(module_name=None, *_):
if module_name == "command":
return {
'failed': True,
@@ -97,8 +97,7 @@ def test_all_images_unavailable():
'changed': False,
}
- check = DockerImageAvailability(execute_module=execute_module)
- actual = check.run(tmp=None, task_vars=dict(
+ actual = DockerImageAvailability(execute_module, task_vars=dict(
openshift=dict(
common=dict(
service_type='origin',
@@ -108,9 +107,9 @@ def test_all_images_unavailable():
docker=dict(additional_registries=["docker.io"]),
),
openshift_deployment_type="openshift-enterprise",
- openshift_release=None,
- openshift_image_tag='latest'
- ))
+ openshift_image_tag='latest',
+ group_names=['nodes', 'masters'],
+ )).run()
assert actual['failed']
assert "required Docker images are not available" in actual['msg']
@@ -127,7 +126,7 @@ def test_all_images_unavailable():
),
])
def test_skopeo_update_failure(message, extra_words):
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(module_name=None, *_):
if module_name == "yum":
return {
"failed": True,
@@ -137,7 +136,7 @@ def test_skopeo_update_failure(message, extra_words):
return {'changed': False}
- actual = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict(
+ actual = DockerImageAvailability(execute_module, task_vars=dict(
openshift=dict(
common=dict(
service_type='origin',
@@ -147,9 +146,9 @@ def test_skopeo_update_failure(message, extra_words):
docker=dict(additional_registries=["unknown.io"]),
),
openshift_deployment_type="openshift-enterprise",
- openshift_release='',
openshift_image_tag='',
- ))
+ group_names=['nodes', 'masters'],
+ )).run()
assert actual["failed"]
for word in extra_words:
@@ -162,12 +161,12 @@ def test_skopeo_update_failure(message, extra_words):
("openshift-enterprise", []),
])
def test_registry_availability(deployment_type, registries):
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(module_name=None, *_):
return {
'changed': False,
}
- actual = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict(
+ actual = DockerImageAvailability(execute_module, task_vars=dict(
openshift=dict(
common=dict(
service_type='origin',
@@ -177,8 +176,99 @@ def test_registry_availability(deployment_type, registries):
docker=dict(additional_registries=registries),
),
openshift_deployment_type=deployment_type,
- openshift_release='',
openshift_image_tag='',
- ))
+ group_names=['nodes', 'masters'],
+ )).run()
assert not actual.get("failed", False)
+
+
+@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [
+ ( # standard set of stuff required on nodes
+ "origin", False, ['nodes'], None,
+ set([
+ 'openshift/origin-pod:vtest',
+ 'openshift/origin-deployer:vtest',
+ 'openshift/origin-docker-registry:vtest',
+ 'openshift/origin-haproxy-router:vtest',
+ 'cockpit/kubernetes', # origin version of registry-console
+ ])
+ ),
+ ( # set a different URL for images
+ "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}',
+ set([
+ 'foo.io/openshift/origin-pod:vtest',
+ 'foo.io/openshift/origin-deployer:vtest',
+ 'foo.io/openshift/origin-docker-registry:vtest',
+ 'foo.io/openshift/origin-haproxy-router:vtest',
+ 'cockpit/kubernetes', # AFAICS this is not built from the URL
+ ])
+ ),
+ (
+ "origin", True, ['nodes', 'masters', 'etcd'], None,
+ set([
+ # images running on top of openshift
+ 'openshift/origin-pod:vtest',
+ 'openshift/origin-deployer:vtest',
+ 'openshift/origin-docker-registry:vtest',
+ 'openshift/origin-haproxy-router:vtest',
+ 'cockpit/kubernetes',
+ # containerized component images
+ 'openshift/origin:vtest',
+ 'openshift/node:vtest',
+ 'openshift/openvswitch:vtest',
+ 'registry.access.redhat.com/rhel7/etcd',
+ ])
+ ),
+ ( # enterprise images
+ "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45',
+ set([
+ 'foo.io/openshift3/ose-pod:f13ac45',
+ 'foo.io/openshift3/ose-deployer:f13ac45',
+ 'foo.io/openshift3/ose-docker-registry:f13ac45',
+ 'foo.io/openshift3/ose-haproxy-router:f13ac45',
+ # registry-console is not constructed/versioned the same as the others.
+ 'registry.access.redhat.com/openshift3/registry-console',
+ # containerized images aren't built from oreg_url
+ 'openshift3/node:vtest',
+ 'openshift3/openvswitch:vtest',
+ ])
+ ),
+ (
+ "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45',
+ set([
+ 'registry.access.redhat.com/rhel7/etcd',
+ # lb does not yet come in a containerized version
+ ])
+ ),
+
+])
+def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected):
+ task_vars = dict(
+ openshift=dict(
+ common=dict(
+ is_containerized=is_containerized,
+ is_atomic=False,
+ ),
+ ),
+ openshift_deployment_type=deployment_type,
+ group_names=groups,
+ oreg_url=oreg_url,
+ openshift_image_tag='vtest',
+ )
+
+ assert expected == DockerImageAvailability("DUMMY", task_vars).required_images()
+
+
+def test_containerized_etcd():
+ task_vars = dict(
+ openshift=dict(
+ common=dict(
+ is_containerized=True,
+ ),
+ ),
+ openshift_deployment_type="origin",
+ group_names=['etcd'],
+ )
+ expected = set(['registry.access.redhat.com/rhel7/etcd'])
+ assert expected == DockerImageAvailability("DUMMY", task_vars).required_images()
diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py
index 292a323db..e0dccc062 100644
--- a/roles/openshift_health_checker/test/docker_storage_test.py
+++ b/roles/openshift_health_checker/test/docker_storage_test.py
@@ -4,12 +4,6 @@ from openshift_checks import OpenShiftCheckException
from openshift_checks.docker_storage import DockerStorage
-def dummy_check(execute_module=None):
- def dummy_exec(self, status, task_vars):
- raise Exception("dummy executor called")
- return DockerStorage(execute_module=execute_module or dummy_exec)
-
-
@pytest.mark.parametrize('is_containerized, group_names, is_active', [
(False, ["masters", "etcd"], False),
(False, ["masters", "nodes"], True),
@@ -20,10 +14,11 @@ def test_is_active(is_containerized, group_names, is_active):
openshift=dict(common=dict(is_containerized=is_containerized)),
group_names=group_names,
)
- assert DockerStorage.is_active(task_vars=task_vars) == is_active
+ assert DockerStorage(None, task_vars).is_active() == is_active
-non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
+def non_atomic_task_vars():
+ return {"openshift": {"common": {"is_atomic": False}}}
@pytest.mark.parametrize('docker_info, failed, expect_msg', [
@@ -56,7 +51,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
(
dict(info={
"Driver": "overlay2",
- "DriverStatus": []
+ "DriverStatus": [("Backing Filesystem", "xfs")],
}),
False,
[],
@@ -64,9 +59,30 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
(
dict(info={
"Driver": "overlay",
+ "DriverStatus": [("Backing Filesystem", "btrfs")],
}),
True,
- ["unsupported Docker storage driver"],
+ ["storage is type 'btrfs'", "only supported with\n'xfs'"],
+ ),
+ (
+ dict(info={
+ "Driver": "overlay2",
+ "DriverStatus": [("Backing Filesystem", "xfs")],
+ "OperatingSystem": "Red Hat Enterprise Linux Server release 7.2 (Maipo)",
+ "KernelVersion": "3.10.0-327.22.2.el7.x86_64",
+ }),
+ True,
+ ["Docker reports kernel version 3.10.0-327"],
+ ),
+ (
+ dict(info={
+ "Driver": "overlay",
+ "DriverStatus": [("Backing Filesystem", "xfs")],
+ "OperatingSystem": "CentOS",
+ "KernelVersion": "3.10.0-514",
+ }),
+ False,
+ [],
),
(
dict(info={
@@ -77,16 +93,17 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
),
])
def test_check_storage_driver(docker_info, failed, expect_msg):
- def execute_module(module_name, args, tmp=None, task_vars=None):
+ def execute_module(module_name, *_):
if module_name == "yum":
return {}
if module_name != "docker_info":
raise ValueError("not expecting module " + module_name)
return docker_info
- check = dummy_check(execute_module=execute_module)
- check._check_dm_usage = lambda status, task_vars: dict() # stub out for this test
- result = check.run(tmp=None, task_vars=non_atomic_task_vars)
+ check = DockerStorage(execute_module, non_atomic_task_vars())
+ check.check_dm_usage = lambda status: dict() # stub out for this test
+ check.check_overlay_usage = lambda info: dict() # stub out for this test
+ result = check.run()
if failed:
assert result["failed"]
@@ -145,9 +162,9 @@ not_enough_space = {
),
])
def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg):
- check = dummy_check()
- check._get_vg_free = lambda pool, task_vars: vg_free
- result = check._check_dm_usage(driver_status, task_vars)
+ check = DockerStorage(None, task_vars)
+ check.get_vg_free = lambda pool: vg_free
+ result = check.check_dm_usage(driver_status)
result_success = not result.get("failed")
assert result_success is success
@@ -187,18 +204,18 @@ def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg):
)
])
def test_vg_free(pool, command_returns, raises, returns):
- def execute_module(module_name, args, tmp=None, task_vars=None):
+ def execute_module(module_name, *_):
if module_name != "command":
raise ValueError("not expecting module " + module_name)
return command_returns
- check = dummy_check(execute_module=execute_module)
+ check = DockerStorage(execute_module)
if raises:
with pytest.raises(OpenShiftCheckException) as err:
- check._get_vg_free(pool, {})
+ check.get_vg_free(pool)
assert raises in str(err.value)
else:
- ret = check._get_vg_free(pool, {})
+ ret = check.get_vg_free(pool)
assert ret == returns
@@ -209,7 +226,7 @@ def test_vg_free(pool, command_returns, raises, returns):
("12g", 12.0 * 1024**3),
])
def test_convert_to_bytes(string, expect_bytes):
- got = DockerStorage._convert_to_bytes(string)
+ got = DockerStorage.convert_to_bytes(string)
assert got == expect_bytes
@@ -219,6 +236,70 @@ def test_convert_to_bytes(string, expect_bytes):
])
def test_convert_to_bytes_error(string):
with pytest.raises(ValueError) as err:
- DockerStorage._convert_to_bytes(string)
+ DockerStorage.convert_to_bytes(string)
assert "Cannot convert" in str(err.value)
assert string in str(err.value)
+
+
+ansible_mounts_enough = [{
+ 'mount': '/var/lib/docker',
+ 'size_available': 50 * 10**9,
+ 'size_total': 50 * 10**9,
+}]
+ansible_mounts_not_enough = [{
+ 'mount': '/var/lib/docker',
+ 'size_available': 0,
+ 'size_total': 50 * 10**9,
+}]
+ansible_mounts_missing_fields = [dict(mount='/var/lib/docker')]
+ansible_mounts_zero_size = [{
+ 'mount': '/var/lib/docker',
+ 'size_available': 0,
+ 'size_total': 0,
+}]
+
+
+@pytest.mark.parametrize('ansible_mounts, threshold, expect_fail, expect_msg', [
+ (
+ ansible_mounts_enough,
+ None,
+ False,
+ [],
+ ),
+ (
+ ansible_mounts_not_enough,
+ None,
+ True,
+ ["usage percentage", "higher than threshold"],
+ ),
+ (
+ ansible_mounts_not_enough,
+ "bogus percent",
+ True,
+ ["is not a percentage"],
+ ),
+ (
+ ansible_mounts_missing_fields,
+ None,
+ True,
+ ["Ansible bug"],
+ ),
+ (
+ ansible_mounts_zero_size,
+ None,
+ True,
+ ["Ansible bug"],
+ ),
+])
+def test_overlay_usage(ansible_mounts, threshold, expect_fail, expect_msg):
+ task_vars = non_atomic_task_vars()
+ task_vars["ansible_mounts"] = ansible_mounts
+ if threshold is not None:
+ task_vars["max_overlay_usage_percent"] = threshold
+ check = DockerStorage(None, task_vars)
+ docker_info = dict(DockerRootDir="/var/lib/docker", Driver="overlay")
+ result = check.check_overlay_usage(docker_info)
+
+ assert expect_fail == bool(result.get("failed"))
+ for msg in expect_msg:
+ assert msg in result["msg"]
diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py
index b9d375d8c..9edfc17c7 100644
--- a/roles/openshift_health_checker/test/elasticsearch_test.py
+++ b/roles/openshift_health_checker/test/elasticsearch_test.py
@@ -6,9 +6,9 @@ from openshift_checks.logging.elasticsearch import Elasticsearch
task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin')))
-def canned_elasticsearch(exec_oc=None):
+def canned_elasticsearch(task_vars=None, exec_oc=None):
"""Create an Elasticsearch check object with canned exec_oc method"""
- check = Elasticsearch("dummy") # fails if a module is actually invoked
+ check = Elasticsearch("dummy", task_vars or {}) # fails if a module is actually invoked
if exec_oc:
check._exec_oc = exec_oc
return check
@@ -50,10 +50,10 @@ split_es_pod = {
def test_check_elasticsearch():
- assert 'No logging Elasticsearch pods' in canned_elasticsearch().check_elasticsearch([], {})
+ assert 'No logging Elasticsearch pods' in canned_elasticsearch().check_elasticsearch([])
# canned oc responses to match so all the checks pass
- def _exec_oc(cmd, args, task_vars):
+ def _exec_oc(cmd, args):
if '_cat/master' in cmd:
return 'name logging-es'
elif '/_nodes' in cmd:
@@ -65,7 +65,7 @@ def test_check_elasticsearch():
else:
raise Exception(cmd)
- assert not canned_elasticsearch(_exec_oc).check_elasticsearch([plain_es_pod], {})
+ assert not canned_elasticsearch({}, _exec_oc).check_elasticsearch([plain_es_pod])
def pods_by_name(pods):
@@ -88,9 +88,9 @@ def pods_by_name(pods):
])
def test_check_elasticsearch_masters(pods, expect_error):
test_pods = list(pods)
- check = canned_elasticsearch(lambda cmd, args, task_vars: test_pods.pop(0)['_test_master_name_str'])
+ check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: test_pods.pop(0)['_test_master_name_str'])
- errors = check._check_elasticsearch_masters(pods_by_name(pods), task_vars_config_base)
+ errors = check._check_elasticsearch_masters(pods_by_name(pods))
assert_error(''.join(errors), expect_error)
@@ -124,9 +124,9 @@ es_node_list = {
),
])
def test_check_elasticsearch_node_list(pods, node_list, expect_error):
- check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(node_list))
+ check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(node_list))
- errors = check._check_elasticsearch_node_list(pods_by_name(pods), task_vars_config_base)
+ errors = check._check_elasticsearch_node_list(pods_by_name(pods))
assert_error(''.join(errors), expect_error)
@@ -149,9 +149,9 @@ def test_check_elasticsearch_node_list(pods, node_list, expect_error):
])
def test_check_elasticsearch_cluster_health(pods, health_data, expect_error):
test_health_data = list(health_data)
- check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(test_health_data.pop(0)))
+ check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(test_health_data.pop(0)))
- errors = check._check_es_cluster_health(pods_by_name(pods), task_vars_config_base)
+ errors = check._check_es_cluster_health(pods_by_name(pods))
assert_error(''.join(errors), expect_error)
@@ -174,7 +174,7 @@ def test_check_elasticsearch_cluster_health(pods, health_data, expect_error):
),
])
def test_check_elasticsearch_diskspace(disk_data, expect_error):
- check = canned_elasticsearch(lambda cmd, args, task_vars: disk_data)
+ check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: disk_data)
- errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod]), task_vars_config_base)
+ errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod]))
assert_error(''.join(errors), expect_error)
diff --git a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py
index df9d52d41..e3d6706fa 100644
--- a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py
+++ b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py
@@ -51,10 +51,10 @@ def test_cannot_determine_available_mountpath(ansible_mounts, extra_words):
task_vars = dict(
ansible_mounts=ansible_mounts,
)
- check = EtcdImageDataSize(execute_module=fake_execute_module)
+ check = EtcdImageDataSize(fake_execute_module, task_vars)
with pytest.raises(OpenShiftCheckException) as excinfo:
- check.run(tmp=None, task_vars=task_vars)
+ check.run()
for word in 'determine valid etcd mountpath'.split() + extra_words:
assert word in str(excinfo.value)
@@ -111,14 +111,14 @@ def test_cannot_determine_available_mountpath(ansible_mounts, extra_words):
)
])
def test_check_etcd_key_size_calculates_correct_limit(ansible_mounts, tree, size_limit, should_fail, extra_words):
- def execute_module(module_name, args, tmp=None, task_vars=None):
+ def execute_module(module_name, module_args, *_):
if module_name != "etcdkeysize":
return {
"changed": False,
}
client = fake_etcd_client(tree)
- s, limit_exceeded = check_etcd_key_size(client, tree["key"], args["size_limit_bytes"])
+ s, limit_exceeded = check_etcd_key_size(client, tree["key"], module_args["size_limit_bytes"])
return {"size_limit_exceeded": limit_exceeded}
@@ -133,7 +133,7 @@ def test_check_etcd_key_size_calculates_correct_limit(ansible_mounts, tree, size
if size_limit is None:
task_vars.pop("etcd_max_image_data_size_bytes")
- check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars)
+ check = EtcdImageDataSize(execute_module, task_vars).run()
if should_fail:
assert check["failed"]
@@ -267,14 +267,14 @@ def test_check_etcd_key_size_calculates_correct_limit(ansible_mounts, tree, size
),
])
def test_etcd_key_size_check_calculates_correct_size(ansible_mounts, tree, root_path, expected_size, extra_words):
- def execute_module(module_name, args, tmp=None, task_vars=None):
+ def execute_module(module_name, module_args, *_):
if module_name != "etcdkeysize":
return {
"changed": False,
}
client = fake_etcd_client(tree)
- size, limit_exceeded = check_etcd_key_size(client, root_path, args["size_limit_bytes"])
+ size, limit_exceeded = check_etcd_key_size(client, root_path, module_args["size_limit_bytes"])
assert size == expected_size
return {
@@ -289,12 +289,12 @@ def test_etcd_key_size_check_calculates_correct_size(ansible_mounts, tree, root_
)
)
- check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars)
+ check = EtcdImageDataSize(execute_module, task_vars).run()
assert not check.get("failed", False)
def test_etcdkeysize_module_failure():
- def execute_module(module_name, tmp=None, task_vars=None):
+ def execute_module(module_name, *_):
if module_name != "etcdkeysize":
return {
"changed": False,
@@ -317,7 +317,7 @@ def test_etcdkeysize_module_failure():
)
)
- check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars)
+ check = EtcdImageDataSize(execute_module, task_vars).run()
assert check["failed"]
for word in "Failed to retrieve stats":
diff --git a/roles/openshift_health_checker/test/etcd_traffic_test.py b/roles/openshift_health_checker/test/etcd_traffic_test.py
new file mode 100644
index 000000000..f4316c423
--- /dev/null
+++ b/roles/openshift_health_checker/test/etcd_traffic_test.py
@@ -0,0 +1,74 @@
+import pytest
+
+from openshift_checks.etcd_traffic import EtcdTraffic
+
+
+@pytest.mark.parametrize('group_names,version,is_active', [
+ (['masters'], "3.5", False),
+ (['masters'], "3.6", False),
+ (['nodes'], "3.4", False),
+ (['etcd'], "3.4", True),
+ (['etcd'], "3.5", True),
+ (['etcd'], "3.1", False),
+ (['masters', 'nodes'], "3.5", False),
+ (['masters', 'etcd'], "3.5", True),
+ ([], "3.4", False),
+])
+def test_is_active(group_names, version, is_active):
+ task_vars = dict(
+ group_names=group_names,
+ openshift=dict(
+ common=dict(short_version=version),
+ ),
+ )
+ assert EtcdTraffic(task_vars=task_vars).is_active() == is_active
+
+
+@pytest.mark.parametrize('group_names,matched,failed,extra_words', [
+ (["masters"], True, True, ["Higher than normal", "traffic"]),
+ (["masters", "etcd"], False, False, []),
+ (["etcd"], False, False, []),
+])
+def test_log_matches_high_traffic_msg(group_names, matched, failed, extra_words):
+ def execute_module(module_name, *_):
+ return {
+ "matched": matched,
+ "failed": failed,
+ }
+
+ task_vars = dict(
+ group_names=group_names,
+ openshift=dict(
+ common=dict(service_type="origin", is_containerized=False),
+ )
+ )
+
+ result = EtcdTraffic(execute_module, task_vars).run()
+
+ for word in extra_words:
+ assert word in result.get("msg", "")
+
+ assert result.get("failed", False) == failed
+
+
+@pytest.mark.parametrize('is_containerized,expected_unit_value', [
+ (False, "etcd"),
+ (True, "etcd_container"),
+])
+def test_systemd_unit_matches_deployment_type(is_containerized, expected_unit_value):
+ task_vars = dict(
+ openshift=dict(
+ common=dict(is_containerized=is_containerized),
+ )
+ )
+
+ def execute_module(module_name, args, *_):
+ assert module_name == "search_journalctl"
+ matchers = args["log_matchers"]
+
+ for matcher in matchers:
+ assert matcher["unit"] == expected_unit_value
+
+ return {"failed": False}
+
+ EtcdTraffic(execute_module, task_vars).run()
diff --git a/roles/openshift_health_checker/test/etcd_volume_test.py b/roles/openshift_health_checker/test/etcd_volume_test.py
index 917045526..0b255136e 100644
--- a/roles/openshift_health_checker/test/etcd_volume_test.py
+++ b/roles/openshift_health_checker/test/etcd_volume_test.py
@@ -11,10 +11,9 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
task_vars = dict(
ansible_mounts=ansible_mounts,
)
- check = EtcdVolume(execute_module=fake_execute_module)
with pytest.raises(OpenShiftCheckException) as excinfo:
- check.run(tmp=None, task_vars=task_vars)
+ EtcdVolume(fake_execute_module, task_vars).run()
for word in 'Unable to find etcd storage mount point'.split() + extra_words:
assert word in str(excinfo.value)
@@ -76,8 +75,7 @@ def test_succeeds_with_recommended_disk_space(size_limit, ansible_mounts):
if task_vars["etcd_device_usage_threshold_percent"] is None:
task_vars.pop("etcd_device_usage_threshold_percent")
- check = EtcdVolume(execute_module=fake_execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ result = EtcdVolume(fake_execute_module, task_vars).run()
assert not result.get('failed', False)
@@ -137,8 +135,7 @@ def test_fails_with_insufficient_disk_space(size_limit_percent, ansible_mounts,
if task_vars["etcd_device_usage_threshold_percent"] is None:
task_vars.pop("etcd_device_usage_threshold_percent")
- check = EtcdVolume(execute_module=fake_execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ result = EtcdVolume(fake_execute_module, task_vars).run()
assert result['failed']
for word in extra_words:
diff --git a/roles/openshift_health_checker/test/fluentd_test.py b/roles/openshift_health_checker/test/fluentd_test.py
index d151c0b19..9cee57868 100644
--- a/roles/openshift_health_checker/test/fluentd_test.py
+++ b/roles/openshift_health_checker/test/fluentd_test.py
@@ -103,7 +103,7 @@ fluentd_node3_unlabeled = {
),
])
def test_get_fluentd_pods(pods, nodes, expect_error):
- check = canned_fluentd(lambda cmd, args, task_vars: json.dumps(dict(items=nodes)))
+ check = canned_fluentd(exec_oc=lambda cmd, args: json.dumps(dict(items=nodes)))
- error = check.check_fluentd(pods, {})
+ error = check.check_fluentd(pods)
assert_error(error, expect_error)
diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py
index 19140a1b6..3a880d300 100644
--- a/roles/openshift_health_checker/test/kibana_test.py
+++ b/roles/openshift_health_checker/test/kibana_test.py
@@ -13,7 +13,7 @@ from openshift_checks.logging.kibana import Kibana
def canned_kibana(exec_oc=None):
"""Create a Kibana check object with canned exec_oc method"""
- check = Kibana("dummy") # fails if a module is actually invoked
+ check = Kibana() # fails if a module is actually invoked
if exec_oc:
check._exec_oc = exec_oc
return check
@@ -137,9 +137,9 @@ def test_check_kibana(pods, expect_error):
),
])
def test_get_kibana_url(route, expect_url, expect_error):
- check = canned_kibana(lambda cmd, args, task_vars: json.dumps(route) if route else "")
+ check = canned_kibana(exec_oc=lambda cmd, args: json.dumps(route) if route else "")
- url, error = check._get_kibana_url({})
+ url, error = check._get_kibana_url()
if expect_url:
assert url == expect_url
else:
@@ -169,10 +169,10 @@ def test_get_kibana_url(route, expect_url, expect_error):
),
])
def test_verify_url_internal_failure(exec_result, expect):
- check = Kibana(execute_module=lambda module_name, args, task_vars: dict(failed=True, msg=exec_result))
- check._get_kibana_url = lambda task_vars: ('url', None)
+ check = Kibana(execute_module=lambda *_: dict(failed=True, msg=exec_result))
+ check._get_kibana_url = lambda: ('url', None)
- error = check._check_kibana_route({})
+ error = check._check_kibana_route()
assert_error(error, expect)
@@ -211,8 +211,8 @@ def test_verify_url_external_failure(lib_result, expect, monkeypatch):
monkeypatch.setattr(urllib2, 'urlopen', urlopen)
check = canned_kibana()
- check._get_kibana_url = lambda task_vars: ('url', None)
- check._verify_url_internal = lambda url, task_vars: None
+ check._get_kibana_url = lambda: ('url', None)
+ check._verify_url_internal = lambda url: None
- error = check._check_kibana_route({})
+ error = check._check_kibana_route()
assert_error(error, expect)
diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py
index b6db34fe3..6f1697ee6 100644
--- a/roles/openshift_health_checker/test/logging_check_test.py
+++ b/roles/openshift_health_checker/test/logging_check_test.py
@@ -11,7 +11,7 @@ logging_namespace = "logging"
def canned_loggingcheck(exec_oc=None):
"""Create a LoggingCheck object with canned exec_oc method"""
- check = LoggingCheck("dummy") # fails if a module is actually invoked
+ check = LoggingCheck() # fails if a module is actually invoked
check.logging_namespace = 'logging'
if exec_oc:
check.exec_oc = exec_oc
@@ -50,6 +50,16 @@ plain_kibana_pod = {
}
}
+plain_kibana_pod_no_containerstatus = {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana-1",
+ },
+ "status": {
+ "conditions": [{"status": "True", "type": "Ready"}],
+ }
+}
+
fluentd_pod_node1 = {
"metadata": {
"labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
@@ -80,15 +90,15 @@ plain_curator_pod = {
("Permission denied", "Unexpected error using `oc`"),
])
def test_oc_failure(problem, expect):
- def execute_module(module_name, args, task_vars):
+ def execute_module(module_name, *_):
if module_name == "ocutil":
return dict(failed=True, result=problem)
return dict(changed=False)
- check = LoggingCheck({})
+ check = LoggingCheck(execute_module, task_vars_config_base)
with pytest.raises(OpenShiftCheckException) as excinfo:
- check.exec_oc(execute_module, logging_namespace, 'get foo', [], task_vars=task_vars_config_base)
+ check.exec_oc(logging_namespace, 'get foo', [])
assert expect in str(excinfo)
@@ -111,14 +121,14 @@ def test_is_active(groups, logging_deployed, is_active):
openshift_hosted_logging_deploy=logging_deployed,
)
- assert LoggingCheck.is_active(task_vars=task_vars) == is_active
+ assert LoggingCheck(None, task_vars).is_active() == is_active
@pytest.mark.parametrize('pod_output, expect_pods, expect_error', [
(
'No resources found.',
None,
- 'There are no pods in the logging namespace',
+ 'No pods were found for the "es"',
),
(
json.dumps({'items': [plain_kibana_pod, plain_es_pod, plain_curator_pod, fluentd_pod_node1]}),
@@ -127,11 +137,29 @@ def test_is_active(groups, logging_deployed, is_active):
),
])
def test_get_pods_for_component(pod_output, expect_pods, expect_error):
- check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: pod_output)
+ check = canned_loggingcheck(lambda namespace, cmd, args: pod_output)
pods, error = check.get_pods_for_component(
- lambda name, args, task_vars: {},
logging_namespace,
"es",
- {}
)
assert_error(error, expect_error)
+
+
+@pytest.mark.parametrize('name, pods, expected_pods', [
+ (
+ 'test single pod found, scheduled, but no containerStatuses field',
+ [plain_kibana_pod_no_containerstatus],
+ [plain_kibana_pod_no_containerstatus],
+ ),
+ (
+ 'set of pods has at least one pod with containerStatuses (scheduled); should still fail',
+ [plain_kibana_pod_no_containerstatus, plain_kibana_pod],
+ [plain_kibana_pod_no_containerstatus],
+ ),
+
+], ids=lambda argvals: argvals[0])
+def test_get_not_running_pods_no_container_status(name, pods, expected_pods):
+ check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: '')
+ result = check.not_running_pods(pods)
+
+ assert result == expected_pods
diff --git a/roles/openshift_health_checker/test/logging_index_time_test.py b/roles/openshift_health_checker/test/logging_index_time_test.py
new file mode 100644
index 000000000..178d7cd84
--- /dev/null
+++ b/roles/openshift_health_checker/test/logging_index_time_test.py
@@ -0,0 +1,170 @@
+import json
+
+import pytest
+
+from openshift_checks.logging.logging_index_time import LoggingIndexTime, OpenShiftCheckException
+
+
+SAMPLE_UUID = "unique-test-uuid"
+
+
+def canned_loggingindextime(exec_oc=None):
+ """Create a check object with a canned exec_oc method"""
+ check = LoggingIndexTime() # fails if a module is actually invoked
+ if exec_oc:
+ check.exec_oc = exec_oc
+ return check
+
+
+plain_running_elasticsearch_pod = {
+ "metadata": {
+ "labels": {"component": "es", "deploymentconfig": "logging-es-data-master"},
+ "name": "logging-es-data-master-1",
+ },
+ "status": {
+ "containerStatuses": [{"ready": True}, {"ready": True}],
+ "phase": "Running",
+ }
+}
+plain_running_kibana_pod = {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana-1",
+ },
+ "status": {
+ "containerStatuses": [{"ready": True}, {"ready": True}],
+ "phase": "Running",
+ }
+}
+not_running_kibana_pod = {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana-2",
+ },
+ "status": {
+ "containerStatuses": [{"ready": True}, {"ready": False}],
+ "conditions": [{"status": "True", "type": "Ready"}],
+ "phase": "pending",
+ }
+}
+
+
+@pytest.mark.parametrize('pods, expect_pods', [
+ (
+ [not_running_kibana_pod],
+ [],
+ ),
+ (
+ [plain_running_kibana_pod],
+ [plain_running_kibana_pod],
+ ),
+ (
+ [],
+ [],
+ )
+])
+def test_check_running_pods(pods, expect_pods):
+ check = canned_loggingindextime()
+ pods = check.running_pods(pods)
+ assert pods == expect_pods
+
+
+@pytest.mark.parametrize('name, json_response, uuid, timeout, extra_words', [
+ (
+ 'valid count in response',
+ {
+ "count": 1,
+ },
+ SAMPLE_UUID,
+ 0.001,
+ [],
+ ),
+], ids=lambda argval: argval[0])
+def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout, extra_words):
+ check = canned_loggingindextime(lambda *_: json.dumps(json_response))
+ check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout)
+
+
+@pytest.mark.parametrize('name, json_response, uuid, timeout, extra_words', [
+ (
+ 'invalid json response',
+ {
+ "invalid_field": 1,
+ },
+ SAMPLE_UUID,
+ 0.001,
+ ["invalid response", "Elasticsearch"],
+ ),
+ (
+ 'empty response',
+ {},
+ SAMPLE_UUID,
+ 0.001,
+ ["invalid response", "Elasticsearch"],
+ ),
+ (
+ 'valid response but invalid match count',
+ {
+ "count": 0,
+ },
+ SAMPLE_UUID,
+ 0.005,
+ ["expecting match", SAMPLE_UUID, "0.005s"],
+ )
+], ids=lambda argval: argval[0])
+def test_wait_until_cmd_or_err(name, json_response, uuid, timeout, extra_words):
+ check = canned_loggingindextime(lambda *_: json.dumps(json_response))
+ with pytest.raises(OpenShiftCheckException) as error:
+ check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout)
+
+ for word in extra_words:
+ assert word in str(error)
+
+
+@pytest.mark.parametrize('name, json_response, uuid, extra_words', [
+ (
+ 'correct response code, found unique id is returned',
+ {
+ "statusCode": 404,
+ },
+ "sample unique id",
+ ["sample unique id"],
+ ),
+], ids=lambda argval: argval[0])
+def test_curl_kibana_with_uuid(name, json_response, uuid, extra_words):
+ check = canned_loggingindextime(lambda *_: json.dumps(json_response))
+ check.generate_uuid = lambda: uuid
+
+ result = check.curl_kibana_with_uuid(plain_running_kibana_pod)
+
+ for word in extra_words:
+ assert word in result
+
+
+@pytest.mark.parametrize('name, json_response, uuid, extra_words', [
+ (
+ 'invalid json response',
+ {
+ "invalid_field": "invalid",
+ },
+ SAMPLE_UUID,
+ ["invalid response returned", 'Missing "statusCode" key'],
+ ),
+ (
+ 'wrong error code in response',
+ {
+ "statusCode": 500,
+ },
+ SAMPLE_UUID,
+ ["Expecting error code", "500"],
+ ),
+], ids=lambda argval: argval[0])
+def test_failed_curl_kibana_with_uuid(name, json_response, uuid, extra_words):
+ check = canned_loggingindextime(lambda *_: json.dumps(json_response))
+ check.generate_uuid = lambda: uuid
+
+ with pytest.raises(OpenShiftCheckException) as error:
+ check.curl_kibana_with_uuid(plain_running_kibana_pod)
+
+ for word in extra_words:
+ assert word in str(error)
diff --git a/roles/openshift_health_checker/test/memory_availability_test.py b/roles/openshift_health_checker/test/memory_availability_test.py
index 4fbaea0a9..aee2f0416 100644
--- a/roles/openshift_health_checker/test/memory_availability_test.py
+++ b/roles/openshift_health_checker/test/memory_availability_test.py
@@ -17,7 +17,7 @@ def test_is_active(group_names, is_active):
task_vars = dict(
group_names=group_names,
)
- assert MemoryAvailability.is_active(task_vars=task_vars) == is_active
+ assert MemoryAvailability(None, task_vars).is_active() == is_active
@pytest.mark.parametrize('group_names,configured_min,ansible_memtotal_mb', [
@@ -59,8 +59,7 @@ def test_succeeds_with_recommended_memory(group_names, configured_min, ansible_m
ansible_memtotal_mb=ansible_memtotal_mb,
)
- check = MemoryAvailability(execute_module=fake_execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ result = MemoryAvailability(fake_execute_module, task_vars).run()
assert not result.get('failed', False)
@@ -117,8 +116,7 @@ def test_fails_with_insufficient_memory(group_names, configured_min, ansible_mem
ansible_memtotal_mb=ansible_memtotal_mb,
)
- check = MemoryAvailability(execute_module=fake_execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ result = MemoryAvailability(fake_execute_module, task_vars).run()
assert result.get('failed', False)
for word in 'below recommended'.split() + extra_words:
diff --git a/roles/openshift_health_checker/test/mixins_test.py b/roles/openshift_health_checker/test/mixins_test.py
index 2d83e207d..b1a41ca3c 100644
--- a/roles/openshift_health_checker/test/mixins_test.py
+++ b/roles/openshift_health_checker/test/mixins_test.py
@@ -14,10 +14,10 @@ class NotContainerizedCheck(NotContainerizedMixin, OpenShiftCheck):
(dict(openshift=dict(common=dict(is_containerized=True))), False),
])
def test_is_active(task_vars, expected):
- assert NotContainerizedCheck.is_active(task_vars) == expected
+ assert NotContainerizedCheck(None, task_vars).is_active() == expected
def test_is_active_missing_task_vars():
with pytest.raises(OpenShiftCheckException) as excinfo:
- NotContainerizedCheck.is_active(task_vars={})
+ NotContainerizedCheck().is_active()
assert 'is_containerized' in str(excinfo.value)
diff --git a/roles/openshift_health_checker/test/openshift_check_test.py b/roles/openshift_health_checker/test/openshift_check_test.py
index e3153979c..43aa875f4 100644
--- a/roles/openshift_health_checker/test/openshift_check_test.py
+++ b/roles/openshift_health_checker/test/openshift_check_test.py
@@ -1,7 +1,7 @@
import pytest
from openshift_checks import OpenShiftCheck, OpenShiftCheckException
-from openshift_checks import load_checks, get_var
+from openshift_checks import load_checks
# Fixtures
@@ -28,34 +28,23 @@ def test_OpenShiftCheck_init():
name = "test_check"
run = NotImplemented
- # initialization requires at least one argument (apart from self)
- with pytest.raises(TypeError) as excinfo:
- TestCheck()
+ # execute_module required at init if it will be used
+ with pytest.raises(RuntimeError) as excinfo:
+ TestCheck().execute_module("foo")
assert 'execute_module' in str(excinfo.value)
- assert 'module_executor' in str(excinfo.value)
execute_module = object()
# initialize with positional argument
check = TestCheck(execute_module)
- # new recommended name
- assert check.execute_module == execute_module
- # deprecated attribute name
- assert check.module_executor == execute_module
+ assert check._execute_module == execute_module
- # initialize with keyword argument, recommended name
+ # initialize with keyword argument
check = TestCheck(execute_module=execute_module)
- # new recommended name
- assert check.execute_module == execute_module
- # deprecated attribute name
- assert check.module_executor == execute_module
+ assert check._execute_module == execute_module
- # initialize with keyword argument, deprecated name
- check = TestCheck(module_executor=execute_module)
- # new recommended name
- assert check.execute_module == execute_module
- # deprecated attribute name
- assert check.module_executor == execute_module
+ assert check.task_vars == {}
+ assert check.tmp is None
def test_subclasses():
@@ -81,19 +70,27 @@ def test_load_checks():
assert modules
+def dummy_check(task_vars):
+ class TestCheck(OpenShiftCheck):
+ name = "dummy"
+ run = NotImplemented
+
+ return TestCheck(task_vars=task_vars)
+
+
@pytest.mark.parametrize("keys,expected", [
(("foo",), 42),
(("bar", "baz"), "openshift"),
])
def test_get_var_ok(task_vars, keys, expected):
- assert get_var(task_vars, *keys) == expected
+ assert dummy_check(task_vars).get_var(*keys) == expected
def test_get_var_error(task_vars, missing_keys):
with pytest.raises(OpenShiftCheckException):
- get_var(task_vars, *missing_keys)
+ dummy_check(task_vars).get_var(*missing_keys)
def test_get_var_default(task_vars, missing_keys):
default = object()
- assert get_var(task_vars, *missing_keys, default=default) == default
+ assert dummy_check(task_vars).get_var(*missing_keys, default=default) == default
diff --git a/roles/openshift_health_checker/test/ovs_version_test.py b/roles/openshift_health_checker/test/ovs_version_test.py
index 6494e1c06..b6acef5a6 100644
--- a/roles/openshift_health_checker/test/ovs_version_test.py
+++ b/roles/openshift_health_checker/test/ovs_version_test.py
@@ -4,7 +4,7 @@ from openshift_checks.ovs_version import OvsVersion, OpenShiftCheckException
def test_openshift_version_not_supported():
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(*_):
return {}
openshift_release = '111.7.0'
@@ -16,15 +16,14 @@ def test_openshift_version_not_supported():
openshift_deployment_type='origin',
)
- check = OvsVersion(execute_module=execute_module)
with pytest.raises(OpenShiftCheckException) as excinfo:
- check.run(tmp=None, task_vars=task_vars)
+ OvsVersion(execute_module, task_vars).run()
assert "no recommended version of Open vSwitch" in str(excinfo.value)
def test_invalid_openshift_release_format():
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(*_):
return {}
task_vars = dict(
@@ -33,9 +32,8 @@ def test_invalid_openshift_release_format():
openshift_deployment_type='origin',
)
- check = OvsVersion(execute_module=execute_module)
with pytest.raises(OpenShiftCheckException) as excinfo:
- check.run(tmp=None, task_vars=task_vars)
+ OvsVersion(execute_module, task_vars).run()
assert "invalid version" in str(excinfo.value)
@@ -54,7 +52,7 @@ def test_ovs_package_version(openshift_release, expected_ovs_version):
)
return_value = object()
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(module_name=None, module_args=None, *_):
assert module_name == 'rpm_version'
assert "package_list" in module_args
@@ -64,8 +62,7 @@ def test_ovs_package_version(openshift_release, expected_ovs_version):
return return_value
- check = OvsVersion(execute_module=execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ result = OvsVersion(execute_module, task_vars).run()
assert result is return_value
@@ -86,4 +83,4 @@ def test_ovs_version_skip_when_not_master_nor_node(group_names, is_containerized
group_names=group_names,
openshift=dict(common=dict(is_containerized=is_containerized)),
)
- assert OvsVersion.is_active(task_vars=task_vars) == is_active
+ assert OvsVersion(None, task_vars).is_active() == is_active
diff --git a/roles/openshift_health_checker/test/package_availability_test.py b/roles/openshift_health_checker/test/package_availability_test.py
index f7e916a46..1fe648b75 100644
--- a/roles/openshift_health_checker/test/package_availability_test.py
+++ b/roles/openshift_health_checker/test/package_availability_test.py
@@ -14,7 +14,7 @@ def test_is_active(pkg_mgr, is_containerized, is_active):
ansible_pkg_mgr=pkg_mgr,
openshift=dict(common=dict(is_containerized=is_containerized)),
)
- assert PackageAvailability.is_active(task_vars=task_vars) == is_active
+ assert PackageAvailability(None, task_vars).is_active() == is_active
@pytest.mark.parametrize('task_vars,must_have_packages,must_not_have_packages', [
@@ -51,13 +51,12 @@ def test_is_active(pkg_mgr, is_containerized, is_active):
def test_package_availability(task_vars, must_have_packages, must_not_have_packages):
return_value = object()
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(module_name=None, module_args=None, *_):
assert module_name == 'check_yum_update'
assert 'packages' in module_args
assert set(module_args['packages']).issuperset(must_have_packages)
assert not set(module_args['packages']).intersection(must_not_have_packages)
return return_value
- check = PackageAvailability(execute_module=execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ result = PackageAvailability(execute_module, task_vars).run()
assert result is return_value
diff --git a/roles/openshift_health_checker/test/package_update_test.py b/roles/openshift_health_checker/test/package_update_test.py
index 5e000cff5..06489b0d7 100644
--- a/roles/openshift_health_checker/test/package_update_test.py
+++ b/roles/openshift_health_checker/test/package_update_test.py
@@ -4,13 +4,12 @@ from openshift_checks.package_update import PackageUpdate
def test_package_update():
return_value = object()
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(module_name=None, module_args=None, *_):
assert module_name == 'check_yum_update'
assert 'packages' in module_args
# empty list of packages means "generic check if 'yum update' will work"
assert module_args['packages'] == []
return return_value
- check = PackageUpdate(execute_module=execute_module)
- result = check.run(tmp=None, task_vars=None)
+ result = PackageUpdate(execute_module).run()
assert result is return_value
diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py
index 91eace512..1ddb9cecb 100644
--- a/roles/openshift_health_checker/test/package_version_test.py
+++ b/roles/openshift_health_checker/test/package_version_test.py
@@ -8,7 +8,7 @@ from openshift_checks.package_version import PackageVersion, OpenShiftCheckExcep
('0.0.0', ["no recommended version of Docker"]),
])
def test_openshift_version_not_supported(openshift_release, extra_words):
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(*_):
return {}
task_vars = dict(
@@ -18,16 +18,16 @@ def test_openshift_version_not_supported(openshift_release, extra_words):
openshift_deployment_type='origin',
)
- check = PackageVersion(execute_module=execute_module)
+ check = PackageVersion(execute_module, task_vars)
with pytest.raises(OpenShiftCheckException) as excinfo:
- check.run(tmp=None, task_vars=task_vars)
+ check.run()
for word in extra_words:
assert word in str(excinfo.value)
def test_invalid_openshift_release_format():
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(*_):
return {}
task_vars = dict(
@@ -36,9 +36,9 @@ def test_invalid_openshift_release_format():
openshift_deployment_type='origin',
)
- check = PackageVersion(execute_module=execute_module)
+ check = PackageVersion(execute_module, task_vars)
with pytest.raises(OpenShiftCheckException) as excinfo:
- check.run(tmp=None, task_vars=task_vars)
+ check.run()
assert "invalid version" in str(excinfo.value)
@@ -57,7 +57,7 @@ def test_package_version(openshift_release):
)
return_value = object()
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *_):
assert module_name == 'aos_version'
assert "package_list" in module_args
@@ -67,38 +67,8 @@ def test_package_version(openshift_release):
return return_value
- check = PackageVersion(execute_module=execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
- assert result is return_value
-
-
-@pytest.mark.parametrize('deployment_type,openshift_release,expected_ovs_version', [
- ("openshift-enterprise", "3.5", "2.6"),
- ("origin", "3.6", "2.6"),
- ("openshift-enterprise", "3.4", "2.4"),
- ("origin", "3.3", "2.4"),
-])
-def test_ovs_package_version(deployment_type, openshift_release, expected_ovs_version):
- task_vars = dict(
- openshift=dict(common=dict(service_type='origin')),
- openshift_release=openshift_release,
- openshift_image_tag='v' + openshift_release,
- openshift_deployment_type=deployment_type,
- )
- return_value = object()
-
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
- assert module_name == 'aos_version'
- assert "package_list" in module_args
-
- for pkg in module_args["package_list"]:
- if pkg["name"] == "openvswitch":
- assert pkg["version"] == expected_ovs_version
-
- return return_value
-
- check = PackageVersion(execute_module=execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ check = PackageVersion(execute_module, task_vars)
+ result = check.run()
assert result is return_value
@@ -119,7 +89,7 @@ def test_docker_package_version(deployment_type, openshift_release, expected_doc
)
return_value = object()
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ def execute_module(module_name=None, module_args=None, *_):
assert module_name == 'aos_version'
assert "package_list" in module_args
@@ -129,8 +99,8 @@ def test_docker_package_version(deployment_type, openshift_release, expected_doc
return return_value
- check = PackageVersion(execute_module=execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
+ check = PackageVersion(execute_module, task_vars)
+ result = check.run()
assert result is return_value
@@ -151,4 +121,4 @@ def test_package_version_skip_when_not_master_nor_node(group_names, is_container
group_names=group_names,
openshift=dict(common=dict(is_containerized=is_containerized)),
)
- assert PackageVersion.is_active(task_vars=task_vars) == is_active
+ assert PackageVersion(None, task_vars).is_active() == is_active
diff --git a/roles/openshift_health_checker/test/search_journalctl_test.py b/roles/openshift_health_checker/test/search_journalctl_test.py
new file mode 100644
index 000000000..724928aa1
--- /dev/null
+++ b/roles/openshift_health_checker/test/search_journalctl_test.py
@@ -0,0 +1,157 @@
+import pytest
+import search_journalctl
+
+
+def canned_search_journalctl(get_log_output=None):
+ """Create a search_journalctl object with canned get_log_output method"""
+ module = search_journalctl
+ if get_log_output:
+ module.get_log_output = get_log_output
+ return module
+
+
+DEFAULT_TIMESTAMP = 1496341364
+
+
+def get_timestamp(modifier=0):
+ return DEFAULT_TIMESTAMP + modifier
+
+
+def get_timestamp_microseconds(modifier=0):
+ return get_timestamp(modifier) * 1000000
+
+
+def create_test_log_object(stamp, msg):
+ return '{{"__REALTIME_TIMESTAMP": "{}", "MESSAGE": "{}"}}'.format(stamp, msg)
+
+
+@pytest.mark.parametrize('name,matchers,log_input,expected_matches,expected_errors', [
+ (
+ 'test with valid params',
+ [
+ {
+ "start_regexp": r"Sample Logs Beginning",
+ "regexp": r"test log message",
+ "unit": "test",
+ },
+ ],
+ [
+ create_test_log_object(get_timestamp_microseconds(), "test log message"),
+ create_test_log_object(get_timestamp_microseconds(), "Sample Logs Beginning"),
+ ],
+ ["test log message"],
+ [],
+ ),
+ (
+ 'test with invalid json in log input',
+ [
+ {
+ "start_regexp": r"Sample Logs Beginning",
+ "regexp": r"test log message",
+ "unit": "test-unit",
+ },
+ ],
+ [
+ '{__REALTIME_TIMESTAMP: ' + str(get_timestamp_microseconds()) + ', "MESSAGE": "test log message"}',
+ ],
+ [],
+ [
+ ["invalid json", "test-unit", "test log message"],
+ ],
+ ),
+ (
+ 'test with invalid regexp',
+ [
+ {
+ "start_regexp": r"Sample Logs Beginning",
+ "regexp": r"test [ log message",
+ "unit": "test",
+ },
+ ],
+ [
+ create_test_log_object(get_timestamp_microseconds(), "test log message"),
+ create_test_log_object(get_timestamp_microseconds(), "sample log message"),
+ create_test_log_object(get_timestamp_microseconds(), "fake log message"),
+ create_test_log_object(get_timestamp_microseconds(), "dummy log message"),
+ create_test_log_object(get_timestamp_microseconds(), "Sample Logs Beginning"),
+ ],
+ [],
+ [
+ ["invalid regular expression"],
+ ],
+ ),
+], ids=lambda argval: argval[0])
+def test_get_log_matches(name, matchers, log_input, expected_matches, expected_errors):
+ def get_log_output(matcher):
+ return log_input
+
+ module = canned_search_journalctl(get_log_output)
+ matched_regexp, errors = module.get_log_matches(matchers, 500, 60 * 60)
+
+ assert set(matched_regexp) == set(expected_matches)
+ assert len(expected_errors) == len(errors)
+
+ for idx, partial_err_set in enumerate(expected_errors):
+ for partial_err_msg in partial_err_set:
+ assert partial_err_msg in errors[idx]
+
+
+@pytest.mark.parametrize('name,matcher,log_count_lim,stamp_lim_seconds,log_input,expected_match', [
+ (
+ 'test with matching log message, but out of bounds of log_count_lim',
+ {
+ "start_regexp": r"Sample Logs Beginning",
+ "regexp": r"dummy log message",
+ "unit": "test",
+ },
+ 3,
+ get_timestamp(-100 * 60 * 60),
+ [
+ create_test_log_object(get_timestamp_microseconds(), "test log message"),
+ create_test_log_object(get_timestamp_microseconds(), "sample log message"),
+ create_test_log_object(get_timestamp_microseconds(), "fake log message"),
+ create_test_log_object(get_timestamp_microseconds(), "dummy log message"),
+ create_test_log_object(get_timestamp_microseconds(), "Sample Logs Beginning"),
+ ],
+ None,
+ ),
+ (
+ 'test with matching log message, but with timestamp too old',
+ {
+ "start_regexp": r"Sample Logs Beginning",
+ "regexp": r"dummy log message",
+ "unit": "test",
+ },
+ 100,
+ get_timestamp(-10),
+ [
+ create_test_log_object(get_timestamp_microseconds(), "test log message"),
+ create_test_log_object(get_timestamp_microseconds(), "sample log message"),
+ create_test_log_object(get_timestamp_microseconds(), "fake log message"),
+ create_test_log_object(get_timestamp_microseconds(-1000), "dummy log message"),
+ create_test_log_object(get_timestamp_microseconds(-1000), "Sample Logs Beginning"),
+ ],
+ None,
+ ),
+ (
+ 'test with matching log message, and timestamp within time limit',
+ {
+ "start_regexp": r"Sample Logs Beginning",
+ "regexp": r"dummy log message",
+ "unit": "test",
+ },
+ 100,
+ get_timestamp(-1010),
+ [
+ create_test_log_object(get_timestamp_microseconds(), "test log message"),
+ create_test_log_object(get_timestamp_microseconds(), "sample log message"),
+ create_test_log_object(get_timestamp_microseconds(), "fake log message"),
+ create_test_log_object(get_timestamp_microseconds(-1000), "dummy log message"),
+ create_test_log_object(get_timestamp_microseconds(-1000), "Sample Logs Beginning"),
+ ],
+ create_test_log_object(get_timestamp_microseconds(-1000), "dummy log message"),
+ ),
+], ids=lambda argval: argval[0])
+def test_find_matches_skips_logs(name, matcher, log_count_lim, stamp_lim_seconds, log_input, expected_match):
+ match = search_journalctl.find_matches(log_input, matcher, log_count_lim, stamp_lim_seconds)
+ assert match == expected_match