summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks/docker_image_availability.py')
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py323
1 files changed, 177 insertions, 146 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index cce289b95..9c35f0f92 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -1,179 +1,210 @@
-# pylint: disable=missing-docstring
-from openshift_checks import OpenShiftCheck, get_var
+"""Check that required Docker images are available."""
+from openshift_checks import OpenShiftCheck
+from openshift_checks.mixins import DockerHostMixin
-class DockerImageAvailability(OpenShiftCheck):
+
+NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"]
+DEPLOYMENT_IMAGE_INFO = {
+ "origin": {
+ "namespace": "openshift",
+ "name": "origin",
+ "registry_console_image": "cockpit/kubernetes",
+ },
+ "openshift-enterprise": {
+ "namespace": "openshift3",
+ "name": "ose",
+ "registry_console_image": "registry.access.redhat.com/openshift3/registry-console",
+ },
+}
+
+
+class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
"""Check that required Docker images are available.
- This check attempts to ensure that required docker images are
- either present locally, or able to be pulled down from available
- registries defined in a host machine.
+ Determine docker images that an install would require and check that they
+ are either present in the host's docker index, or available for the host to pull
+ with known registries as defined in our inventory file (or defaults).
"""
name = "docker_image_availability"
tags = ["preflight"]
+ # we use python-docker-py to check local docker for images, and skopeo
+ # to look for images available remotely without waiting to pull them.
+ dependencies = ["python-docker-py", "skopeo"]
+ skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false docker://{registry}/{image}"
- skopeo_image = "openshift/openshift-ansible"
-
- # FIXME(juanvallejo): we should consider other possible values of
- # `deployment_type` (the key here). See
- # https://github.com/openshift/openshift-ansible/blob/8e26f8c/roles/openshift_repos/vars/main.yml#L7
- docker_image_base = {
- "origin": {
- "repo": "openshift",
- "image": "origin",
- },
- "openshift-enterprise": {
- "repo": "openshift3",
- "image": "ose",
- },
- }
-
- def run(self, tmp, task_vars):
- required_images = self.required_images(task_vars)
- missing_images = set(required_images) - set(self.local_images(required_images, task_vars))
+ def __init__(self, *args, **kwargs):
+ super(DockerImageAvailability, self).__init__(*args, **kwargs)
+ # record whether we could reach a registry or not (and remember results)
+ self.reachable_registries = {}
- # exit early if all images were found locally
- if not missing_images:
- return {"changed": False}
+ def is_active(self):
+ """Skip hosts with unsupported deployment types."""
+ deployment_type = self.get_var("openshift_deployment_type")
+ has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO
- msg, failed, changed = self.update_skopeo_image(task_vars)
+ return super(DockerImageAvailability, self).is_active() and has_valid_deployment_type
- # exit early if Skopeo update fails
+ def run(self):
+ msg, failed = self.ensure_dependencies()
if failed:
return {
"failed": True,
- "changed": changed,
- "msg": "Failed to update Skopeo image ({img_name}). {msg}".format(img_name=self.skopeo_image, msg=msg),
- }
-
- registries = self.known_docker_registries(task_vars)
- available_images = self.available_images(missing_images, registries, task_vars)
- unavailable_images = set(missing_images) - set(available_images)
-
- if unavailable_images:
- return {
- "failed": True,
- "msg": (
- "One or more required images are not available: {}.\n"
- "Configured registries: {}"
- ).format(", ".join(sorted(unavailable_images)), ", ".join(registries)),
- "changed": changed,
+ "msg": "Some dependencies are required in order to check Docker image availability.\n" + msg
}
- return {"changed": changed}
-
- def required_images(self, task_vars):
- deployment_type = get_var(task_vars, "deployment_type")
- # FIXME(juanvallejo): we should handle gracefully with a proper error
- # message when given an unexpected value for `deployment_type`.
- image_base_name = self.docker_image_base[deployment_type]
-
- openshift_release = get_var(task_vars, "openshift_release")
- # FIXME(juanvallejo): this variable is not required when the
- # installation is non-containerized. The example inventories have it
- # commented out. We should handle gracefully and with a proper error
- # message when this variable is required and not set.
- openshift_image_tag = get_var(task_vars, "openshift_image_tag")
+ required_images = self.required_images()
+ missing_images = set(required_images) - set(self.local_images(required_images))
- is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
-
- if is_containerized:
- images = set(self.containerized_docker_images(image_base_name, openshift_release))
- else:
- images = set(self.rpm_docker_images(image_base_name, openshift_release))
+ # exit early if all images were found locally
+ if not missing_images:
+ return {}
- # append images with qualified image tags to our list of required images.
- # these are images with a (v0.0.0.0) tag, rather than a standard release
- # format tag (v0.0). We want to check this set in both containerized and
- # non-containerized installations.
- images.update(
- self.qualified_docker_images(self.image_from_base_name(image_base_name), "v" + openshift_image_tag)
- )
+ registries = self.known_docker_registries()
+ if not registries:
+ return {"failed": True, "msg": "Unable to retrieve any docker registries."}
- return images
+ available_images = self.available_images(missing_images, registries)
+ unavailable_images = set(missing_images) - set(available_images)
- def local_images(self, images, task_vars):
+ if unavailable_images:
+ registries = [
+ reg if self.reachable_registries.get(reg, True) else reg + " (unreachable)"
+ for reg in registries
+ ]
+ msg = (
+ "One or more required Docker images are not available:\n {}\n"
+ "Configured registries: {}\n"
+ "Checked by: {}"
+ ).format(
+ ",\n ".join(sorted(unavailable_images)),
+ ", ".join(registries),
+ self.skopeo_img_check_command
+ )
+
+ return dict(failed=True, msg=msg)
+
+ return {}
+
+ def required_images(self):
+ """
+ Determine which images we expect to need for this host.
+ Returns: a set of required images like 'openshift/origin:v3.6'
+
+ The thorny issue of determining the image names from the variables is under consideration
+ via https://github.com/openshift/openshift-ansible/issues/4415
+
+ For now we operate as follows:
+ * For containerized components (master, node, ...) we look at the deployment type and
+ use openshift/origin or openshift3/ose as the base for those component images. The
+ version is openshift_image_tag as determined by the openshift_version role.
+ * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if
+ it is defined; otherwise we again use the base that depends on the deployment type.
+ Registry is not included in constructed images. It may be in oreg_url or etcd image.
+ """
+ required = set()
+ deployment_type = self.get_var("openshift_deployment_type")
+ host_groups = self.get_var("group_names")
+ # containerized etcd may not have openshift_image_tag, see bz 1466622
+ image_tag = self.get_var("openshift_image_tag", default="latest")
+ image_info = DEPLOYMENT_IMAGE_INFO[deployment_type]
+ if not image_info:
+ return required
+
+ # template for images that run on top of OpenShift
+ image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}")
+ image_url = self.get_var("oreg_url", default="") or image_url
+ if 'nodes' in host_groups:
+ for suffix in NODE_IMAGE_SUFFIXES:
+ required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag))
+ # The registry-console is for some reason not prefixed with ose- like the other components.
+ # Nor is it versioned the same, so just look for latest.
+ # Also a completely different name is used for Origin.
+ required.add(image_info["registry_console_image"])
+
+ # images for containerized components
+ if self.get_var("openshift", "common", "is_containerized"):
+ components = set()
+ if 'nodes' in host_groups:
+ components.update(["node", "openvswitch"])
+ if 'masters' in host_groups: # name is "origin" or "ose"
+ components.add(image_info["name"])
+ for component in components:
+ required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag))
+ if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise
+ required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag
+
+ return required
+
+ def local_images(self, images):
"""Filter a list of images and return those available locally."""
+ registries = self.known_docker_registries()
+ found_images = []
+ for image in images:
+ # docker could have the image name as-is or prefixed with any registry
+ imglist = [image] + [reg + "/" + image for reg in registries]
+ if self.is_image_local(imglist):
+ found_images.append(image)
+ return found_images
+
+ def is_image_local(self, image):
+ """Check if image is already in local docker index."""
+ result = self.execute_module("docker_image_facts", {"name": image})
+ return bool(result.get("images")) and not result.get("failed")
+
+ def known_docker_registries(self):
+ """Build a list of docker registries available according to inventory vars."""
+ regs = list(self.get_var("openshift.docker.additional_registries", default=[]))
+
+ deployment_type = self.get_var("openshift_deployment_type")
+ if deployment_type == "origin" and "docker.io" not in regs:
+ regs.append("docker.io")
+ elif "enterprise" in deployment_type and "registry.access.redhat.com" not in regs:
+ regs.append("registry.access.redhat.com")
+
+ return regs
+
+ def available_images(self, images, default_registries):
+ """Search remotely for images. Returns: list of images found."""
return [
image for image in images
- if self.is_image_local(image, task_vars)
+ if self.is_available_skopeo_image(image, default_registries)
]
- def is_image_local(self, image, task_vars):
- result = self.module_executor("docker_image_facts", {"name": image}, task_vars)
- if result.get("failed", False):
- return False
-
- return bool(result.get("images", []))
-
- def known_docker_registries(self, task_vars):
- result = self.module_executor("docker_info", {}, task_vars)
+ def is_available_skopeo_image(self, image, default_registries):
+ """Use Skopeo to determine if required image exists in known registry(s)."""
+ registries = default_registries
- if result.get("failed", False):
- return []
+ # If image already includes a registry, only use that.
+ # NOTE: This logic would incorrectly identify images that do not use a namespace, e.g.
+ # registry.access.redhat.com/rhel7 as if the registry were a namespace.
+ # It's not clear that there's any way to distinguish them, but fortunately
+ # the current set of images all look like [registry/]namespace/name[:version].
+ if image.count("/") > 1:
+ registry, image = image.split("/", 1)
+ registries = [registry]
- # FIXME(juanvallejo): wrong default type, result["info"] is expected to
- # contain a dictionary (see how we call `docker_info.get` below).
- docker_info = result.get("info", "")
- return [registry.get("Name", "") for registry in docker_info.get("Registries", {})]
-
- def available_images(self, images, registries, task_vars):
- """Inspect existing images using Skopeo and return all images successfully inspected."""
- return [
- image for image in images
- if self.is_image_available(image, registries, task_vars)
- ]
-
- def is_image_available(self, image, registries, task_vars):
for registry in registries:
- if self.is_available_skopeo_image(image, registry, task_vars):
+ if registry not in self.reachable_registries:
+ self.reachable_registries[registry] = self.connect_to_registry(registry)
+ if not self.reachable_registries[registry]:
+ continue
+
+ args = {"_raw_params": self.skopeo_img_check_command.format(registry=registry, image=image)}
+ result = self.execute_module_with_retries("command", args)
+ if result.get("rc", 0) == 0 and not result.get("failed"):
return True
+ if result.get("rc") == 124: # RC 124 == timed out; mark unreachable
+ self.reachable_registries[registry] = False
return False
- def is_available_skopeo_image(self, image, registry, task_vars):
- """Uses Skopeo to determine if required image exists in a given registry."""
-
- cmd_str = "skopeo inspect docker://{registry}/{image}".format(
- registry=registry,
- image=image,
- )
-
- args = {
- "name": "skopeo_inspect",
- "image": self.skopeo_image,
- "command": cmd_str,
- "detach": False,
- "cleanup": True,
- }
- result = self.module_executor("docker_container", args, task_vars)
- return result.get("failed", False)
-
- def containerized_docker_images(self, base_name, version):
- return [
- "{image}:{version}".format(image=self.image_from_base_name(base_name), version=version)
- ]
-
- @staticmethod
- def rpm_docker_images(base, version):
- return [
- "{image_repo}/registry-console:{version}".format(image_repo=base["repo"], version=version)
- ]
-
- @staticmethod
- def qualified_docker_images(image_name, version):
- return [
- "{}-{}:{}".format(image_name, component, version)
- for component in "haproxy-router docker-registry deployer pod".split()
- ]
-
- @staticmethod
- def image_from_base_name(base):
- return "".join([base["repo"], "/", base["image"]])
-
- # ensures that the skopeo docker image exists, and updates it
- # with latest if image was already present locally.
- def update_skopeo_image(self, task_vars):
- result = self.module_executor("docker_image", {"name": self.skopeo_image}, task_vars)
- return result.get("msg", ""), result.get("failed", False), result.get("changed", False)
+ def connect_to_registry(self, registry):
+ """Use ansible wait_for module to test connectivity from host to registry. Returns bool."""
+ # test a simple TCP connection
+ host, _, port = registry.partition(":")
+ port = port or 443
+ args = dict(host=host, port=port, state="started", timeout=30)
+ result = self.execute_module("wait_for", args)
+ return result.get("rc", 0) == 0 and not result.get("failed")