diff options
Diffstat (limited to 'roles/openshift_health_checker')
12 files changed, 899 insertions, 14 deletions
diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py index 208e81048..7bce7f107 100644 --- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py +++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py @@ -1,4 +1,3 @@ -# vim: expandtab:tabstop=4:shiftwidth=4 ''' Ansible callback plugin. ''' diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py index a46589443..4460ec324 100755 --- a/roles/openshift_health_checker/library/aos_version.py +++ b/roles/openshift_health_checker/library/aos_version.py @@ -1,5 +1,4 @@ #!/usr/bin/python -# vim: expandtab:tabstop=4:shiftwidth=4 ''' Ansible module for yum-based systems determining if multiple releases of an OpenShift package are available, and if the release requested diff --git a/roles/openshift_health_checker/library/check_yum_update.py b/roles/openshift_health_checker/library/check_yum_update.py index 630ebc848..433795b67 100755 --- a/roles/openshift_health_checker/library/check_yum_update.py +++ b/roles/openshift_health_checker/library/check_yum_update.py @@ -1,5 +1,4 @@ #!/usr/bin/python -# vim: expandtab:tabstop=4:shiftwidth=4 ''' Ansible module to test whether a yum update or install will succeed, without actually performing it or running yum. diff --git a/roles/openshift_health_checker/library/etcdkeysize.py b/roles/openshift_health_checker/library/etcdkeysize.py new file mode 100644 index 000000000..620e82d87 --- /dev/null +++ b/roles/openshift_health_checker/library/etcdkeysize.py @@ -0,0 +1,122 @@ +#!/usr/bin/python +"""Ansible module that recursively determines if the size of a key in an etcd cluster exceeds a given limit.""" + +from ansible.module_utils.basic import AnsibleModule + + +try: + import etcd + + IMPORT_EXCEPTION_MSG = None +except ImportError as err: + IMPORT_EXCEPTION_MSG = str(err) + + from collections import namedtuple + EtcdMock = namedtuple("etcd", ["EtcdKeyNotFound"]) + etcd = EtcdMock(KeyError) + + +# pylint: disable=too-many-arguments +def check_etcd_key_size(client, key, size_limit, total_size=0, depth=0, depth_limit=1000, visited=None): + """Check size of an etcd path starting at given key. Returns tuple (string, bool)""" + if visited is None: + visited = set() + + if key in visited: + return 0, False + + visited.add(key) + + try: + result = client.read(key, recursive=False) + except etcd.EtcdKeyNotFound: + return 0, False + + size = 0 + limit_exceeded = False + + for node in result.leaves: + if depth >= depth_limit: + raise Exception("Maximum recursive stack depth ({}) exceeded.".format(depth_limit)) + + if size_limit and total_size + size > size_limit: + return size, True + + if not node.dir: + size += len(node.value) + continue + + key_size, limit_exceeded = check_etcd_key_size(client, node.key, + size_limit, + total_size + size, + depth + 1, + depth_limit, visited) + size += key_size + + max_limit_exceeded = limit_exceeded or (total_size + size > size_limit) + return size, max_limit_exceeded + + +def main(): # pylint: disable=missing-docstring,too-many-branches + module = AnsibleModule( + argument_spec=dict( + size_limit_bytes=dict(type="int", default=0), + paths=dict(type="list", default=["/openshift.io/images"]), + host=dict(type="str", default="127.0.0.1"), + port=dict(type="int", default=4001), + protocol=dict(type="str", default="http"), + version_prefix=dict(type="str", default=""), + allow_redirect=dict(type="bool", default=False), + cert=dict(type="dict", default=""), + ca_cert=dict(type="str", default=None), + ), + supports_check_mode=True + ) + + module.params["cert"] = ( + module.params["cert"]["cert"], + module.params["cert"]["key"], + ) + + size_limit = module.params.pop("size_limit_bytes") + paths = module.params.pop("paths") + + limit_exceeded = False + + try: + # pylint: disable=no-member + client = etcd.Client(**module.params) + except AttributeError as attrerr: + msg = str(attrerr) + if IMPORT_EXCEPTION_MSG: + msg = IMPORT_EXCEPTION_MSG + if "No module named etcd" in IMPORT_EXCEPTION_MSG: + # pylint: disable=redefined-variable-type + msg = ('Unable to import the python "etcd" dependency. ' + 'Make sure python-etcd is installed on the host.') + + module.exit_json( + failed=True, + changed=False, + size_limit_exceeded=limit_exceeded, + msg=msg, + ) + + return + + size = 0 + for path in paths: + path_size, limit_exceeded = check_etcd_key_size(client, path, size_limit - size) + size += path_size + + if limit_exceeded: + break + + module.exit_json( + changed=False, + size_limit_exceeded=limit_exceeded, + ) + + +if __name__ == '__main__': + main() diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py new file mode 100644 index 000000000..c2792a0fe --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -0,0 +1,65 @@ +# pylint: disable=missing-docstring +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +from openshift_checks.mixins import NotContainerizedMixin + + +class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): + """Check that recommended disk space is available before a first-time install.""" + + name = "disk_availability" + tags = ["preflight"] + + # Values taken from the official installation documentation: + # https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements + recommended_disk_space_bytes = { + "masters": 40 * 10**9, + "nodes": 15 * 10**9, + "etcd": 20 * 10**9, + } + + @classmethod + def is_active(cls, task_vars): + """Skip hosts that do not have recommended disk space requirements.""" + group_names = get_var(task_vars, "group_names", default=[]) + has_disk_space_recommendation = bool(set(group_names).intersection(cls.recommended_disk_space_bytes)) + return super(DiskAvailability, cls).is_active(task_vars) and has_disk_space_recommendation + + def run(self, tmp, task_vars): + group_names = get_var(task_vars, "group_names") + ansible_mounts = get_var(task_vars, "ansible_mounts") + + min_free_bytes = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names) + free_bytes = self.openshift_available_disk(ansible_mounts) + + if free_bytes < min_free_bytes: + return { + 'failed': True, + 'msg': ( + 'Available disk space ({:.1f} GB) for the volume containing ' + '"/var" is below minimum recommended space ({:.1f} GB)' + ).format(float(free_bytes) / 10**9, float(min_free_bytes) / 10**9) + } + + return {} + + @staticmethod + def openshift_available_disk(ansible_mounts): + """Determine the available disk space for an OpenShift installation. + + ansible_mounts should be a list of dicts like the 'setup' Ansible module + returns. + """ + # priority list in descending order + supported_mnt_paths = ["/var", "/"] + available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts} + + try: + for path in supported_mnt_paths: + if path in available_mnts: + return available_mnts[path]["size_available"] + except KeyError: + pass + + paths = ''.join(sorted(available_mnts)) or 'none' + msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths) + raise OpenShiftCheckException(msg) diff --git a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py new file mode 100644 index 000000000..c04a69765 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py @@ -0,0 +1,84 @@ +""" +Ansible module for determining if the size of OpenShift image data exceeds a specified limit in an etcd cluster. +""" + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var + + +class EtcdImageDataSize(OpenShiftCheck): + """Check that total size of OpenShift image data does not exceed the recommended limit in an etcd cluster""" + + name = "etcd_imagedata_size" + tags = ["etcd"] + + def run(self, tmp, task_vars): + etcd_mountpath = self._get_etcd_mountpath(get_var(task_vars, "ansible_mounts")) + etcd_avail_diskspace = etcd_mountpath["size_available"] + etcd_total_diskspace = etcd_mountpath["size_total"] + + etcd_imagedata_size_limit = get_var(task_vars, + "etcd_max_image_data_size_bytes", + default=int(0.5 * float(etcd_total_diskspace - etcd_avail_diskspace))) + + etcd_is_ssl = get_var(task_vars, "openshift", "master", "etcd_use_ssl", default=False) + etcd_port = get_var(task_vars, "openshift", "master", "etcd_port", default=2379) + etcd_hosts = get_var(task_vars, "openshift", "master", "etcd_hosts") + + config_base = get_var(task_vars, "openshift", "common", "config_base") + + cert = task_vars.get("etcd_client_cert", config_base + "/master/master.etcd-client.crt") + key = task_vars.get("etcd_client_key", config_base + "/master/master.etcd-client.key") + ca_cert = task_vars.get("etcd_client_ca_cert", config_base + "/master/master.etcd-ca.crt") + + for etcd_host in list(etcd_hosts): + args = { + "size_limit_bytes": etcd_imagedata_size_limit, + "paths": ["/openshift.io/images", "/openshift.io/imagestreams"], + "host": etcd_host, + "port": etcd_port, + "protocol": "https" if etcd_is_ssl else "http", + "version_prefix": "/v2", + "allow_redirect": True, + "ca_cert": ca_cert, + "cert": { + "cert": cert, + "key": key, + }, + } + + etcdkeysize = self.module_executor("etcdkeysize", args, task_vars) + + if etcdkeysize.get("rc", 0) != 0 or etcdkeysize.get("failed"): + msg = 'Failed to retrieve stats for etcd host "{host}": {reason}' + reason = etcdkeysize.get("msg") + if etcdkeysize.get("module_stderr"): + reason = etcdkeysize["module_stderr"] + + msg = msg.format(host=etcd_host, reason=reason) + return {"failed": True, "changed": False, "msg": msg} + + if etcdkeysize["size_limit_exceeded"]: + limit = self._to_gigabytes(etcd_imagedata_size_limit) + msg = ("The size of OpenShift image data stored in etcd host " + "\"{host}\" exceeds the maximum recommended limit of {limit:.2f} GB. " + "Use the `oadm prune images` command to cleanup unused Docker images.") + return {"failed": True, "msg": msg.format(host=etcd_host, limit=limit)} + + return {"changed": False} + + @staticmethod + def _get_etcd_mountpath(ansible_mounts): + valid_etcd_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"] + + mount_for_path = {mnt.get("mount"): mnt for mnt in ansible_mounts} + for path in valid_etcd_mount_paths: + if path in mount_for_path: + return mount_for_path[path] + + paths = ', '.join(sorted(mount_for_path)) or 'none' + msg = "Unable to determine a valid etcd mountpath. Paths mounted: {}.".format(paths) + raise OpenShiftCheckException(msg) + + @staticmethod + def _to_gigabytes(byte_size): + return float(byte_size) / 10.0**9 diff --git a/roles/openshift_health_checker/openshift_checks/memory_availability.py b/roles/openshift_health_checker/openshift_checks/memory_availability.py new file mode 100644 index 000000000..28805dc37 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/memory_availability.py @@ -0,0 +1,44 @@ +# pylint: disable=missing-docstring +from openshift_checks import OpenShiftCheck, get_var + + +class MemoryAvailability(OpenShiftCheck): + """Check that recommended memory is available.""" + + name = "memory_availability" + tags = ["preflight"] + + # Values taken from the official installation documentation: + # https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements + recommended_memory_bytes = { + "masters": 16 * 10**9, + "nodes": 8 * 10**9, + "etcd": 20 * 10**9, + } + + @classmethod + def is_active(cls, task_vars): + """Skip hosts that do not have recommended memory requirements.""" + group_names = get_var(task_vars, "group_names", default=[]) + has_memory_recommendation = bool(set(group_names).intersection(cls.recommended_memory_bytes)) + return super(MemoryAvailability, cls).is_active(task_vars) and has_memory_recommendation + + def run(self, tmp, task_vars): + group_names = get_var(task_vars, "group_names") + total_memory_bytes = get_var(task_vars, "ansible_memtotal_mb") * 10**6 + + min_memory_bytes = max(self.recommended_memory_bytes.get(name, 0) for name in group_names) + + if total_memory_bytes < min_memory_bytes: + return { + 'failed': True, + 'msg': ( + 'Available memory ({available:.1f} GB) ' + 'below recommended value ({recommended:.1f} GB)' + ).format( + available=float(total_memory_bytes) / 10**9, + recommended=float(min_memory_bytes) / 10**9, + ), + } + + return {} diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 657e15160..20d160eaf 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -1,4 +1,8 @@ -# pylint: disable=missing-docstring +# pylint: disable=missing-docstring,too-few-public-methods +""" +Mixin classes meant to be used with subclasses of OpenShiftCheck. +""" + from openshift_checks import get_var @@ -7,12 +11,5 @@ class NotContainerizedMixin(object): @classmethod def is_active(cls, task_vars): - return ( - # This mixin is meant to be used with subclasses of OpenShiftCheck. - super(NotContainerizedMixin, cls).is_active(task_vars) and - not cls.is_containerized(task_vars) - ) - - @staticmethod - def is_containerized(task_vars): - return get_var(task_vars, "openshift", "common", "is_containerized") + is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") + return super(NotContainerizedMixin, cls).is_active(task_vars) and not is_containerized diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py index a877246f4..2693ae37b 100644 --- a/roles/openshift_health_checker/test/action_plugin_test.py +++ b/roles/openshift_health_checker/test/action_plugin_test.py @@ -1,5 +1,7 @@ import pytest +from ansible.playbook.play_context import PlayContext + from openshift_health_check import ActionModule, resolve_checks from openshift_checks import OpenShiftCheckException @@ -34,7 +36,7 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru @pytest.fixture def plugin(): task = FakeTask('openshift_health_check', {'checks': ['fake_check']}) - plugin = ActionModule(task, None, None, None, None, None) + plugin = ActionModule(task, None, PlayContext(), None, None, None) return plugin diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py new file mode 100644 index 000000000..970b474d7 --- /dev/null +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -0,0 +1,155 @@ +import pytest + +from openshift_checks.disk_availability import DiskAvailability, OpenShiftCheckException + + +@pytest.mark.parametrize('group_names,is_containerized,is_active', [ + (['masters'], False, True), + # ensure check is skipped on containerized installs + (['masters'], True, False), + (['nodes'], False, True), + (['etcd'], False, True), + (['masters', 'nodes'], False, True), + (['masters', 'etcd'], False, True), + ([], False, False), + (['lb'], False, False), + (['nfs'], False, False), +]) +def test_is_active(group_names, is_containerized, is_active): + task_vars = dict( + group_names=group_names, + openshift=dict(common=dict(is_containerized=is_containerized)), + ) + assert DiskAvailability.is_active(task_vars=task_vars) == is_active + + +@pytest.mark.parametrize('ansible_mounts,extra_words', [ + ([], ['none']), # empty ansible_mounts + ([{'mount': '/mnt'}], ['/mnt']), # missing relevant mount paths + ([{'mount': '/var'}], ['/var']), # missing size_available +]) +def test_cannot_determine_available_disk(ansible_mounts, extra_words): + task_vars = dict( + group_names=['masters'], + ansible_mounts=ansible_mounts, + ) + check = DiskAvailability(execute_module=fake_execute_module) + + with pytest.raises(OpenShiftCheckException) as excinfo: + check.run(tmp=None, task_vars=task_vars) + + for word in 'determine available disk'.split() + extra_words: + assert word in str(excinfo.value) + + +@pytest.mark.parametrize('group_names,ansible_mounts', [ + ( + ['masters'], + [{ + 'mount': '/', + 'size_available': 40 * 10**9 + 1, + }], + ), + ( + ['nodes'], + [{ + 'mount': '/', + 'size_available': 15 * 10**9 + 1, + }], + ), + ( + ['etcd'], + [{ + 'mount': '/', + 'size_available': 20 * 10**9 + 1, + }], + ), + ( + ['etcd'], + [{ + # not enough space on / ... + 'mount': '/', + 'size_available': 0, + }, { + # ... but enough on /var + 'mount': '/var', + 'size_available': 20 * 10**9 + 1, + }], + ), +]) +def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts): + task_vars = dict( + group_names=group_names, + ansible_mounts=ansible_mounts, + ) + + check = DiskAvailability(execute_module=fake_execute_module) + result = check.run(tmp=None, task_vars=task_vars) + + assert not result.get('failed', False) + + +@pytest.mark.parametrize('group_names,ansible_mounts,extra_words', [ + ( + ['masters'], + [{ + 'mount': '/', + 'size_available': 1, + }], + ['0.0 GB'], + ), + ( + ['nodes'], + [{ + 'mount': '/', + 'size_available': 1 * 10**9, + }], + ['1.0 GB'], + ), + ( + ['etcd'], + [{ + 'mount': '/', + 'size_available': 1, + }], + ['0.0 GB'], + ), + ( + ['nodes', 'masters'], + [{ + 'mount': '/', + # enough space for a node, not enough for a master + 'size_available': 15 * 10**9 + 1, + }], + ['15.0 GB'], + ), + ( + ['etcd'], + [{ + # enough space on / ... + 'mount': '/', + 'size_available': 20 * 10**9 + 1, + }, { + # .. but not enough on /var + 'mount': '/var', + 'size_available': 0, + }], + ['0.0 GB'], + ), +]) +def test_fails_with_insufficient_disk_space(group_names, ansible_mounts, extra_words): + task_vars = dict( + group_names=group_names, + ansible_mounts=ansible_mounts, + ) + + check = DiskAvailability(execute_module=fake_execute_module) + result = check.run(tmp=None, task_vars=task_vars) + + assert result['failed'] + for word in 'below recommended'.split() + extra_words: + assert word in result['msg'] + + +def fake_execute_module(*args): + raise AssertionError('this function should not be called') diff --git a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py new file mode 100644 index 000000000..df9d52d41 --- /dev/null +++ b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py @@ -0,0 +1,328 @@ +import pytest + +from collections import namedtuple +from openshift_checks.etcd_imagedata_size import EtcdImageDataSize, OpenShiftCheckException +from etcdkeysize import check_etcd_key_size + + +def fake_etcd_client(root): + fake_nodes = dict() + fake_etcd_node(root, fake_nodes) + + clientclass = namedtuple("client", ["read"]) + return clientclass(lambda key, recursive: fake_etcd_result(fake_nodes[key])) + + +def fake_etcd_result(fake_node): + resultclass = namedtuple("result", ["leaves"]) + if not fake_node.dir: + return resultclass([fake_node]) + + return resultclass(fake_node.leaves) + + +def fake_etcd_node(node, visited): + min_req_fields = ["dir", "key"] + fields = list(node) + leaves = [] + + if node["dir"] and node.get("leaves"): + for leaf in node["leaves"]: + leaves.append(fake_etcd_node(leaf, visited)) + + if len(set(min_req_fields) - set(fields)) > 0: + raise ValueError("fake etcd nodes require at least {} fields.".format(min_req_fields)) + + if node.get("leaves"): + node["leaves"] = leaves + + nodeclass = namedtuple("node", fields) + nodeinst = nodeclass(**node) + visited[nodeinst.key] = nodeinst + + return nodeinst + + +@pytest.mark.parametrize('ansible_mounts,extra_words', [ + ([], ['none']), # empty ansible_mounts + ([{'mount': '/mnt'}], ['/mnt']), # missing relevant mount paths +]) +def test_cannot_determine_available_mountpath(ansible_mounts, extra_words): + task_vars = dict( + ansible_mounts=ansible_mounts, + ) + check = EtcdImageDataSize(execute_module=fake_execute_module) + + with pytest.raises(OpenShiftCheckException) as excinfo: + check.run(tmp=None, task_vars=task_vars) + + for word in 'determine valid etcd mountpath'.split() + extra_words: + assert word in str(excinfo.value) + + +@pytest.mark.parametrize('ansible_mounts,tree,size_limit,should_fail,extra_words', [ + ( + # test that default image size limit evals to 1/2 * (total size in use) + [{ + 'mount': '/', + 'size_available': 40 * 10**9, + 'size_total': 80 * 10**9, + }], + {"dir": False, "key": "/", "value": "1234"}, + None, + False, + [], + ), + ( + [{ + 'mount': '/', + 'size_available': 40 * 10**9, + 'size_total': 48 * 10**9, + }], + {"dir": False, "key": "/", "value": "1234"}, + None, + False, + [], + ), + ( + # set max size limit for image data to be below total node value + # total node value is defined as the sum of the value field + # from every node + [{ + 'mount': '/', + 'size_available': 40 * 10**9, + 'size_total': 48 * 10**9, + }], + {"dir": False, "key": "/", "value": "12345678"}, + 7, + True, + ["exceeds the maximum recommended limit", "0.00 GB"], + ), + ( + [{ + 'mount': '/', + 'size_available': 48 * 10**9 - 1, + 'size_total': 48 * 10**9, + }], + {"dir": False, "key": "/", "value": "1234"}, + None, + True, + ["exceeds the maximum recommended limit", "0.00 GB"], + ) +]) +def test_check_etcd_key_size_calculates_correct_limit(ansible_mounts, tree, size_limit, should_fail, extra_words): + def execute_module(module_name, args, tmp=None, task_vars=None): + if module_name != "etcdkeysize": + return { + "changed": False, + } + + client = fake_etcd_client(tree) + s, limit_exceeded = check_etcd_key_size(client, tree["key"], args["size_limit_bytes"]) + + return {"size_limit_exceeded": limit_exceeded} + + task_vars = dict( + etcd_max_image_data_size_bytes=size_limit, + ansible_mounts=ansible_mounts, + openshift=dict( + master=dict(etcd_hosts=["localhost"]), + common=dict(config_base="/var/lib/origin") + ) + ) + if size_limit is None: + task_vars.pop("etcd_max_image_data_size_bytes") + + check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + + if should_fail: + assert check["failed"] + + for word in extra_words: + assert word in check["msg"] + else: + assert not check.get("failed", False) + + +@pytest.mark.parametrize('ansible_mounts,tree,root_path,expected_size,extra_words', [ + ( + [{ + 'mount': '/', + 'size_available': 40 * 10**9, + 'size_total': 80 * 10**9, + }], + # test recursive size check on tree with height > 1 + { + "dir": True, + "key": "/", + "leaves": [ + {"dir": False, "key": "/foo1", "value": "1234"}, + {"dir": False, "key": "/foo2", "value": "1234"}, + {"dir": False, "key": "/foo3", "value": "1234"}, + {"dir": False, "key": "/foo4", "value": "1234"}, + { + "dir": True, + "key": "/foo5", + "leaves": [ + {"dir": False, "key": "/foo/bar1", "value": "56789"}, + {"dir": False, "key": "/foo/bar2", "value": "56789"}, + {"dir": False, "key": "/foo/bar3", "value": "56789"}, + { + "dir": True, + "key": "/foo/bar4", + "leaves": [ + {"dir": False, "key": "/foo/bar/baz1", "value": "123"}, + {"dir": False, "key": "/foo/bar/baz2", "value": "123"}, + ] + }, + ] + }, + ] + }, + "/", + 37, + [], + ), + ( + [{ + 'mount': '/', + 'size_available': 40 * 10**9, + 'size_total': 80 * 10**9, + }], + # test correct sub-tree size calculation + { + "dir": True, + "key": "/", + "leaves": [ + {"dir": False, "key": "/foo1", "value": "1234"}, + {"dir": False, "key": "/foo2", "value": "1234"}, + {"dir": False, "key": "/foo3", "value": "1234"}, + {"dir": False, "key": "/foo4", "value": "1234"}, + { + "dir": True, + "key": "/foo5", + "leaves": [ + {"dir": False, "key": "/foo/bar1", "value": "56789"}, + {"dir": False, "key": "/foo/bar2", "value": "56789"}, + {"dir": False, "key": "/foo/bar3", "value": "56789"}, + { + "dir": True, + "key": "/foo/bar4", + "leaves": [ + {"dir": False, "key": "/foo/bar/baz1", "value": "123"}, + {"dir": False, "key": "/foo/bar/baz2", "value": "123"}, + ] + }, + ] + }, + ] + }, + "/foo5", + 21, + [], + ), + ( + [{ + 'mount': '/', + 'size_available': 40 * 10**9, + 'size_total': 80 * 10**9, + }], + # test that a non-existing key is handled correctly + { + "dir": False, + "key": "/", + "value": "1234", + }, + "/missing", + 0, + [], + ), + ( + [{ + 'mount': '/', + 'size_available': 40 * 10**9, + 'size_total': 80 * 10**9, + }], + # test etcd cycle handling + { + "dir": True, + "key": "/", + "leaves": [ + {"dir": False, "key": "/foo1", "value": "1234"}, + {"dir": False, "key": "/foo2", "value": "1234"}, + {"dir": False, "key": "/foo3", "value": "1234"}, + {"dir": False, "key": "/foo4", "value": "1234"}, + { + "dir": True, + "key": "/", + "leaves": [ + {"dir": False, "key": "/foo1", "value": "1"}, + ], + }, + ] + }, + "/", + 16, + [], + ), +]) +def test_etcd_key_size_check_calculates_correct_size(ansible_mounts, tree, root_path, expected_size, extra_words): + def execute_module(module_name, args, tmp=None, task_vars=None): + if module_name != "etcdkeysize": + return { + "changed": False, + } + + client = fake_etcd_client(tree) + size, limit_exceeded = check_etcd_key_size(client, root_path, args["size_limit_bytes"]) + + assert size == expected_size + return { + "size_limit_exceeded": limit_exceeded, + } + + task_vars = dict( + ansible_mounts=ansible_mounts, + openshift=dict( + master=dict(etcd_hosts=["localhost"]), + common=dict(config_base="/var/lib/origin") + ) + ) + + check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + assert not check.get("failed", False) + + +def test_etcdkeysize_module_failure(): + def execute_module(module_name, tmp=None, task_vars=None): + if module_name != "etcdkeysize": + return { + "changed": False, + } + + return { + "rc": 1, + "module_stderr": "failure", + } + + task_vars = dict( + ansible_mounts=[{ + 'mount': '/', + 'size_available': 40 * 10**9, + 'size_total': 80 * 10**9, + }], + openshift=dict( + master=dict(etcd_hosts=["localhost"]), + common=dict(config_base="/var/lib/origin") + ) + ) + + check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + + assert check["failed"] + for word in "Failed to retrieve stats": + assert word in check["msg"] + + +def fake_execute_module(*args): + raise AssertionError('this function should not be called') diff --git a/roles/openshift_health_checker/test/memory_availability_test.py b/roles/openshift_health_checker/test/memory_availability_test.py new file mode 100644 index 000000000..e161a5b9e --- /dev/null +++ b/roles/openshift_health_checker/test/memory_availability_test.py @@ -0,0 +1,91 @@ +import pytest + +from openshift_checks.memory_availability import MemoryAvailability + + +@pytest.mark.parametrize('group_names,is_active', [ + (['masters'], True), + (['nodes'], True), + (['etcd'], True), + (['masters', 'nodes'], True), + (['masters', 'etcd'], True), + ([], False), + (['lb'], False), + (['nfs'], False), +]) +def test_is_active(group_names, is_active): + task_vars = dict( + group_names=group_names, + ) + assert MemoryAvailability.is_active(task_vars=task_vars) == is_active + + +@pytest.mark.parametrize('group_names,ansible_memtotal_mb', [ + ( + ['masters'], + 17200, + ), + ( + ['nodes'], + 8200, + ), + ( + ['etcd'], + 22200, + ), + ( + ['masters', 'nodes'], + 17000, + ), +]) +def test_succeeds_with_recommended_memory(group_names, ansible_memtotal_mb): + task_vars = dict( + group_names=group_names, + ansible_memtotal_mb=ansible_memtotal_mb, + ) + + check = MemoryAvailability(execute_module=fake_execute_module) + result = check.run(tmp=None, task_vars=task_vars) + + assert not result.get('failed', False) + + +@pytest.mark.parametrize('group_names,ansible_memtotal_mb,extra_words', [ + ( + ['masters'], + 0, + ['0.0 GB'], + ), + ( + ['nodes'], + 100, + ['0.1 GB'], + ), + ( + ['etcd'], + -1, + ['0.0 GB'], + ), + ( + ['nodes', 'masters'], + # enough memory for a node, not enough for a master + 11000, + ['11.0 GB'], + ), +]) +def test_fails_with_insufficient_memory(group_names, ansible_memtotal_mb, extra_words): + task_vars = dict( + group_names=group_names, + ansible_memtotal_mb=ansible_memtotal_mb, + ) + + check = MemoryAvailability(execute_module=fake_execute_module) + result = check.run(tmp=None, task_vars=task_vars) + + assert result['failed'] + for word in 'below recommended'.split() + extra_words: + assert word in result['msg'] + + +def fake_execute_module(*args): + raise AssertionError('this function should not be called') |