From 7a643daa7ed629cd904cfb5fd5eec4260f0f1582 Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Tue, 13 Jun 2017 19:09:19 +0200 Subject: Refactor DiskAvailability for arbitrary paths Prepare the check to support verifying multiple paths, not only /var. --- .../openshift_checks/disk_availability.py | 96 ++++++++++++++-------- .../test/disk_availability_test.py | 2 +- 2 files changed, 64 insertions(+), 34 deletions(-) (limited to 'roles/openshift_health_checker') diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index 962148cb8..dbcbfbc8e 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -1,4 +1,6 @@ # pylint: disable=missing-docstring +import os.path + from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var from openshift_checks.mixins import NotContainerizedMixin @@ -12,56 +14,84 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): # Values taken from the official installation documentation: # https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements recommended_disk_space_bytes = { - "masters": 40 * 10**9, - "nodes": 15 * 10**9, - "etcd": 20 * 10**9, + '/var': { + 'masters': 40 * 10**9, + 'nodes': 15 * 10**9, + 'etcd': 20 * 10**9, + }, } @classmethod def is_active(cls, task_vars): """Skip hosts that do not have recommended disk space requirements.""" group_names = get_var(task_vars, "group_names", default=[]) - has_disk_space_recommendation = bool(set(group_names).intersection(cls.recommended_disk_space_bytes)) + active_groups = set() + for recommendation in cls.recommended_disk_space_bytes.values(): + active_groups.update(recommendation.keys()) + has_disk_space_recommendation = bool(active_groups.intersection(group_names)) return super(DiskAvailability, cls).is_active(task_vars) and has_disk_space_recommendation def run(self, tmp, task_vars): group_names = get_var(task_vars, "group_names") ansible_mounts = get_var(task_vars, "ansible_mounts") - free_bytes = self.openshift_available_disk(ansible_mounts) - - recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names) - configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9 - min_free_bytes = configured_min or recommended_min - - if free_bytes < min_free_bytes: - return { - 'failed': True, - 'msg': ( - 'Available disk space ({:.1f} GB) for the volume containing ' - '"/var" is below minimum recommended space ({:.1f} GB)' - ).format(float(free_bytes) / 10**9, float(min_free_bytes) / 10**9) + ansible_mounts = {mount['mount']: mount for mount in ansible_mounts} + + user_config = get_var(task_vars, "openshift_check_min_host_disk_gb", default={}) + try: + # For backwards-compatibility, if openshift_check_min_host_disk_gb + # is a number, then it overrides the required config for '/var'. + number = float(user_config) + user_config = { + '/var': { + 'masters': number, + 'nodes': number, + 'etcd': number, + }, } + except TypeError: + # If it is not a number, then it should be a nested dict. + pass + + + for path, recommendation in self.recommended_disk_space_bytes.items(): + free_bytes = self.free_bytes(path, ansible_mounts) + recommended_bytes = max(recommendation.get(name, 0) for name in group_names) + + config = user_config.get(path, {}) + # NOTE: the user config is in GB, but we compare bytes, thus the + # conversion. + config_bytes = max(config.get(name, 0) for name in group_names) * 10**9 + recommended_bytes = config_bytes or recommended_bytes + + if free_bytes < recommended_bytes: + free_gb = float(free_bytes) / 10**9 + recommended_gb = float(recommended_bytes) / 10**9 + return { + 'failed': True, + 'msg': ( + 'Available disk space in "{}" ({:.1f} GB) ' + 'is below minimum recommended ({:.1f} GB)' + ).format(path, free_gb, recommended_gb) + } return {} @staticmethod - def openshift_available_disk(ansible_mounts): - """Determine the available disk space for an OpenShift installation. - - ansible_mounts should be a list of dicts like the 'setup' Ansible module - returns. - """ - # priority list in descending order - supported_mnt_paths = ["/var", "/"] - available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts} + def free_bytes(path, ansible_mounts): + """Return the size available in path based on ansible_mounts.""" + mount_point = path + # arbitry value to prevent an infinite loop, in the unlike case that '/' + # is not in ansible_mounts. + max_depth = 32 + while mount_point not in ansible_mounts and max_depth > 0: + mount_point = os.path.dirname(mount_point) + max_depth -= 1 try: - for path in supported_mnt_paths: - if path in available_mnts: - return available_mnts[path]["size_available"] + free_bytes = ansible_mounts[mount_point]['size_available'] except KeyError: - pass + known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(ansible_mounts)) or 'none' + msg = 'Unable to determine disk availability for "{}". Known mount points: {}.' + raise OpenShiftCheckException(msg.format(path, known_mounts)) - paths = ''.join(sorted(available_mnts)) or 'none' - msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths) - raise OpenShiftCheckException(msg) + return free_bytes diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index b353fa610..de09c51da 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -38,7 +38,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): with pytest.raises(OpenShiftCheckException) as excinfo: check.run(tmp=None, task_vars=task_vars) - for word in 'determine available disk'.split() + extra_words: + for word in 'determine disk availability'.split() + extra_words: assert word in str(excinfo.value) -- cgit v1.2.3 From 75082afc3a2cc9fed1966479dc31946962101488 Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Tue, 13 Jun 2017 19:11:43 +0200 Subject: Require at least 1GB in /usr/bin/local and tempdir During install, those paths are used and require some free space. --- .../openshift_checks/disk_availability.py | 14 ++++++++++++++ .../test/disk_availability_test.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'roles/openshift_health_checker') diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index dbcbfbc8e..2c4642352 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -1,5 +1,6 @@ # pylint: disable=missing-docstring import os.path +import tempfile from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var from openshift_checks.mixins import NotContainerizedMixin @@ -19,6 +20,19 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): 'nodes': 15 * 10**9, 'etcd': 20 * 10**9, }, + # Used to copy client binaries into, + # see roles/openshift_cli/library/openshift_container_binary_sync.py. + '/usr/local/bin': { + 'masters': 1 * 10**9, + 'nodes': 1 * 10**9, + 'etcd': 1 * 10**9, + }, + # Used as temporary storage in several cases. + tempfile.gettempdir(): { + 'masters': 1 * 10**9, + 'nodes': 1 * 10**9, + 'etcd': 1 * 10**9, + }, } @classmethod diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index de09c51da..0c111a46d 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -81,7 +81,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): [{ # not enough space on / ... 'mount': '/', - 'size_available': 0, + 'size_available': 2 * 10**9, }, { # ... but enough on /var 'mount': '/var', -- cgit v1.2.3 From 022cf67ea6be52ee46bb52aa6e78b9690698dc2e Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Mon, 19 Jun 2017 17:35:34 +0200 Subject: Add suggestion to check disk space in any path --- .../openshift_health_checker/openshift_checks/disk_availability.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'roles/openshift_health_checker') diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index 2c4642352..638201495 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -66,7 +66,11 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): # If it is not a number, then it should be a nested dict. pass - + # TODO: as suggested in + # https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021, + # maybe we could support checking disk availability in paths that are + # not part of the official recommendation but present in the user + # configuration. for path, recommendation in self.recommended_disk_space_bytes.items(): free_bytes = self.free_bytes(path, ansible_mounts) recommended_bytes = max(recommendation.get(name, 0) for name in group_names) -- cgit v1.2.3 From af4e0fec218c9e2c089854fa279b0537530bfd75 Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Mon, 19 Jun 2017 17:40:16 +0200 Subject: Add module docstring --- roles/openshift_health_checker/openshift_checks/disk_availability.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'roles/openshift_health_checker') diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index 638201495..ac30d5fa5 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -1,4 +1,5 @@ -# pylint: disable=missing-docstring +"""Check that there is enough disk space in predefined paths.""" + import os.path import tempfile -- cgit v1.2.3 From 11762c063f52d46709db560479234b1d49e602b8 Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Tue, 13 Jun 2017 20:18:04 +0200 Subject: Enable disk check on containerized installs According to the docs the disk requirements should be similar to non-containerized installs. https://docs.openshift.org/latest/install_config/install/rpm_vs_containerized.html#containerized-storage-requirements --- .../openshift_checks/disk_availability.py | 3 +-- .../test/disk_availability_test.py | 23 ++++++++++------------ 2 files changed, 11 insertions(+), 15 deletions(-) (limited to 'roles/openshift_health_checker') diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index ac30d5fa5..e93e81efa 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -4,10 +4,9 @@ import os.path import tempfile from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var -from openshift_checks.mixins import NotContainerizedMixin -class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): +class DiskAvailability(OpenShiftCheck): """Check that recommended disk space is available before a first-time install.""" name = "disk_availability" diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index 0c111a46d..945b9eafc 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -3,22 +3,19 @@ import pytest from openshift_checks.disk_availability import DiskAvailability, OpenShiftCheckException -@pytest.mark.parametrize('group_names,is_containerized,is_active', [ - (['masters'], False, True), - # ensure check is skipped on containerized installs - (['masters'], True, False), - (['nodes'], False, True), - (['etcd'], False, True), - (['masters', 'nodes'], False, True), - (['masters', 'etcd'], False, True), - ([], False, False), - (['lb'], False, False), - (['nfs'], False, False), +@pytest.mark.parametrize('group_names,is_active', [ + (['masters'], True), + (['nodes'], True), + (['etcd'], True), + (['masters', 'nodes'], True), + (['masters', 'etcd'], True), + ([], False), + (['lb'], False), + (['nfs'], False), ]) -def test_is_active(group_names, is_containerized, is_active): +def test_is_active(group_names, is_active): task_vars = dict( group_names=group_names, - openshift=dict(common=dict(is_containerized=is_containerized)), ) assert DiskAvailability.is_active(task_vars=task_vars) == is_active -- cgit v1.2.3 From 11040f1b76981c22d62d17d1d22a3741e50a27fd Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Fri, 9 Jun 2017 17:04:19 +0200 Subject: Capture exceptions when resolving available checks Calling the action plugin (e.g. when running a playbook) with an incorrect check name was raising an unhandled exception, leading to poor output in Ansible (requiring a higher verbosity level to see what is going wrong). --- .../action_plugins/openshift_health_check.py | 5 ++--- roles/openshift_health_checker/test/action_plugin_test.py | 12 +++++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'roles/openshift_health_checker') diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py index a62e4331e..0390dc82e 100644 --- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -38,14 +38,13 @@ class ActionModule(ActionBase): try: known_checks = self.load_known_checks() + args = self._task.args + resolved_checks = resolve_checks(args.get("checks", []), known_checks.values()) except OpenShiftCheckException as e: result["failed"] = True result["msg"] = str(e) return result - args = self._task.args - resolved_checks = resolve_checks(args.get("checks", []), known_checks.values()) - result["checks"] = check_results = {} user_disabled_checks = [ diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py index 6ebf0ebb2..9383b233c 100644 --- a/roles/openshift_health_checker/test/action_plugin_test.py +++ b/roles/openshift_health_checker/test/action_plugin_test.py @@ -59,7 +59,7 @@ def failed(result, msg_has=None): if msg_has is not None: assert 'msg' in result for term in msg_has: - assert term in result['msg'] + assert term.lower() in result['msg'].lower() return result.get('failed', False) @@ -178,6 +178,16 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch): assert not skipped(result) +def test_action_plugin_resolve_checks_exception(plugin, task_vars, monkeypatch): + monkeypatch.setattr(plugin, 'load_known_checks', lambda: {}) + + result = plugin.run(tmp=None, task_vars=task_vars) + + assert failed(result, msg_has=['unknown', 'name']) + assert not changed(result) + assert not skipped(result) + + @pytest.mark.parametrize('names,all_checks,expected', [ ([], [], set()), ( -- cgit v1.2.3 From 055f7182b78feb29909539fca86e1ef55d1fc7db Mon Sep 17 00:00:00 2001 From: Luke Meyer Date: Tue, 27 Jun 2017 14:23:27 -0400 Subject: docker_storage check: make vgs return sane output fix bug 1464974 https://bugzilla.redhat.com/show_bug.cgi?id=1464974 Specify --units on vgs call. In my testing with lvm 2.0.2.171(2) on RHEL Atomic Host 7.4, this turned a response of "<4.07g" into "4.07g" which should resolve the issue. I haven't found what the "<" is for in the first place but I'm thinking this should at least be a safe change. --- roles/openshift_health_checker/openshift_checks/docker_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'roles/openshift_health_checker') diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index 2bd615457..8d0fbcc9c 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -143,7 +143,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): "so the available storage in the VG cannot be determined.".format(pool) ) vg_name = match.groups()[0].replace("--", "-") - vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name + vgs_cmd = "/sbin/vgs --noheadings -o vg_free --units g --select vg_name=" + vg_name # should return free space like " 12.00g" if the VG exists; empty if it does not ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars) -- cgit v1.2.3 From 4d3574957508c257e12d9ba8ec8de48ed9789eb9 Mon Sep 17 00:00:00 2001 From: Luke Meyer Date: Fri, 30 Jun 2017 10:19:50 -0400 Subject: docker_image_availability: fix containerized etcd fixes bug 1466622 - docker_image_availability check on etcd host failed for 'openshift_image_tag' is undefined --- .../openshift_checks/docker_image_availability.py | 3 ++- .../test/docker_image_availability_test.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'roles/openshift_health_checker') diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 26bf4c09b..bde81ad2c 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -94,7 +94,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): required = set() deployment_type = get_var(task_vars, "openshift_deployment_type") host_groups = get_var(task_vars, "group_names") - image_tag = get_var(task_vars, "openshift_image_tag") + # containerized etcd may not have openshift_image_tag, see bz 1466622 + image_tag = get_var(task_vars, "openshift_image_tag", default="latest") image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] if not image_info: return required diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 0a7c0f8d3..3b9e097fb 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -259,3 +259,17 @@ def test_required_images(deployment_type, is_containerized, groups, oreg_url, ex ) assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) + + +def test_containerized_etcd(): + task_vars = dict( + openshift=dict( + common=dict( + is_containerized=True, + ), + ), + openshift_deployment_type="origin", + group_names=['etcd'], + ) + expected = set(['registry.access.redhat.com/rhel7/etcd']) + assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) -- cgit v1.2.3