From a202f1647a28e4d246d0341e37501df44cb1a914 Mon Sep 17 00:00:00 2001 From: Luke Meyer Date: Wed, 23 Aug 2017 17:44:19 -0400 Subject: openshift_checks: add retries in python --- .../openshift_checks/__init__.py | 17 +++++++++++++++++ .../openshift_checks/docker_image_availability.py | 6 ++---- .../openshift_health_checker/openshift_checks/mixins.py | 2 +- .../openshift_checks/package_availability.py | 2 +- .../openshift_checks/package_update.py | 2 +- .../openshift_checks/package_version.py | 2 +- 6 files changed, 23 insertions(+), 8 deletions(-) (limited to 'roles/openshift_health_checker/openshift_checks') diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index 02ee1d0f9..987c955b6 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -4,6 +4,7 @@ Health checks for OpenShift clusters. import operator import os +import time from abc import ABCMeta, abstractmethod, abstractproperty from importlib import import_module @@ -57,6 +58,9 @@ class OpenShiftCheck(object): self._execute_module = execute_module self.task_vars = task_vars or {} self.tmp = tmp + # mainly for testing purposes; see execute_module_with_retries + self._module_retries = 3 + self._module_retry_interval = 5 # seconds # set to True when the check changes the host, for accurate total "changed" count self.changed = False @@ -115,6 +119,19 @@ class OpenShiftCheck(object): ) return self._execute_module(module_name, module_args, self.tmp, self.task_vars) + def execute_module_with_retries(self, module_name, module_args): + """Run execute_module and retry on failure.""" + result = {} + tries = 0 + while True: + res = self.execute_module(module_name, module_args) + if tries > self._module_retries or not res.get("failed"): + result.update(res) + return result + result["last_failed"] = res + tries += 1 + time.sleep(self._module_retry_interval) + def get_var(self, *keys, **kwargs): """Get deeply nested values from task_vars. diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 866c74d7c..458e51d87 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -171,10 +171,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): registries = [registry] for registry in registries: - args = { - "_raw_params": self.skopeo_img_check_command + " docker://{}/{}".format(registry, image) - } - result = self.execute_module("command", args) + args = {"_raw_params": self.skopeo_img_check_command + " docker://{}/{}".format(registry, image)} + result = self.execute_module_with_retries("command", args) if result.get("rc", 0) == 0 and not result.get("failed"): return True diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index e9bae60a3..24f1d938a 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -36,7 +36,7 @@ class DockerHostMixin(object): # NOTE: we would use the "package" module but it's actually an action plugin # and it's not clear how to invoke one of those. This is about the same anyway: - result = self.execute_module( + result = self.execute_module_with_retries( self.get_var("ansible_pkg_mgr", default="yum"), {"name": self.dependencies, "state": "present"}, ) diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index a86180b00..21355c2f0 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -26,7 +26,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} - return self.execute_module("check_yum_update", args) + return self.execute_module_with_retries("check_yum_update", args) @staticmethod def master_packages(rpm_prefix): diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index 1e9aecbe0..8464e8a5e 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck): def run(self): args = {"packages": []} - return self.execute_module("check_yum_update", args) + return self.execute_module_with_retries("check_yum_update", args) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 0b795b6c4..d4aec3ed8 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -76,7 +76,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): ], } - return self.execute_module("aos_version", args) + return self.execute_module_with_retries("aos_version", args) def get_required_ovs_version(self): """Return the correct Open vSwitch version(s) for the current OpenShift version.""" -- cgit v1.2.3