From 9ba4195cb4c70705c4cec2543617738b8e114148 Mon Sep 17 00:00:00 2001 From: Steve Kuznetsov Date: Fri, 9 Jun 2017 07:02:35 -0700 Subject: Verify the rollout status of the hosted router and registry When deploying the hosted router and registry components, we need to ensure that they correctly roll out. The previous checks were weak in that they either simply waited for a set amount of time and/or did one replica check. They would fail if the router or registry took longer to deploy or if there were un-ready or failing replicas. The `oc rollout` command group contains the `status` endpoint for internalizing all of the logic for determining when a rollout has succeeded or failed, so simply using this client call will ensure that the router and registry correctly deploy. Signed-off-by: Steve Kuznetsov --- roles/openshift_hosted/tasks/registry/registry.yml | 6 ++++++ roles/openshift_hosted/tasks/router/router.yml | 20 ++++++-------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml index 751489958..7945e16ca 100644 --- a/roles/openshift_hosted/tasks/registry/registry.yml +++ b/roles/openshift_hosted/tasks/registry/registry.yml @@ -124,6 +124,12 @@ edits: "{{ openshift_hosted_registry_edits }}" force: "{{ True|bool in openshift_hosted_registry_force }}" +- name: Ensure OpenShift registry correctly rolls out + command: | + oc rollout status deploymentconfig {{ openshift_hosted_registry_name }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig + - include: storage/glusterfs.yml when: - openshift.hosted.registry.storage.kind | default(none) == 'glusterfs' or openshift.hosted.registry.storage.glusterfs.swap diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index 192afc87a..3382abd7b 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -55,7 +55,7 @@ state: present with_items: "{{ openshift_hosted_routers }}" -- name: Grant the router serivce account(s) access to the appropriate scc +- name: Grant the router service account(s) access to the appropriate scc oc_adm_policy_user: user: "system:serviceaccount:{{ item.namespace }}:{{ item.serviceaccount }}" namespace: "{{ item.namespace }}" @@ -89,18 +89,10 @@ ports: "{{ item.ports }}" stats_port: "{{ item.stats_port }}" with_items: "{{ openshift_hosted_routers }}" - register: routerout -# This should probably move to module -- name: wait for deploy - pause: - seconds: 30 - when: routerout.changed - -- name: Ensure router replica count matches desired - oc_scale: - kind: dc - name: "{{ item.name | default('router') }}" - namespace: "{{ item.namespace | default('default') }}" - replicas: "{{ item.replicas }}" +- name: Ensure OpenShift router correctly rolls out + command: | + oc rollout status deploymentconfig {{ item.name }} \ + --namespace {{ item.namespace | default('default') }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig with_items: "{{ openshift_hosted_routers }}" -- cgit v1.2.3 From acb78312d18c27f607ad71898fcc6bacab4b5cb6 Mon Sep 17 00:00:00 2001 From: Steve Kuznetsov Date: Mon, 12 Jun 2017 10:36:41 -0700 Subject: Make rollout status check best-effort, add poll We cannot rely on the `watch.Until` call in the `rollout status` subcommand for the time being, so we need to ignore the result of this call. This will make the rollout status check best-effort, so we need to follow it with a poll for the actual status of the rollout, which we can extract from the `openshift.io/deployment.phase` annotation on the ReplicationControllers. This annotation can have only three values -- `Running`, `Complete` and `Failed`. If we poll on this attribute until we stop seeing `Running`, we can then inspect the last result for `Failed`; if it's present, we have failed the deployment. Signed-off-by: Steve Kuznetsov --- roles/openshift_hosted/tasks/registry/registry.yml | 25 ++++++++++++++++++- roles/openshift_hosted/tasks/router/router.yml | 29 +++++++++++++++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml index 7945e16ca..d895e9a68 100644 --- a/roles/openshift_hosted/tasks/registry/registry.yml +++ b/roles/openshift_hosted/tasks/registry/registry.yml @@ -124,11 +124,34 @@ edits: "{{ openshift_hosted_registry_edits }}" force: "{{ True|bool in openshift_hosted_registry_force }}" -- name: Ensure OpenShift registry correctly rolls out +- name: Ensure OpenShift registry correctly rolls out (best-effort today) command: | oc rollout status deploymentconfig {{ openshift_hosted_registry_name }} \ --namespace {{ openshift_hosted_registry_namespace }} \ --config {{ openshift.common.config_base }}/master/admin.kubeconfig + async: 600 + poll: 15 + failed_when: false + +- name: Determine the latest version of the OpenShift registry deployment + command: | + oc get deploymentconfig {{ openshift_hosted_registry_name }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .status.latestVersion }' + register: openshift_hosted_registry_latest_version + +- name: Sanity-check that the OpenShift registry rolled out correctly + command: | + oc get replicationcontroller {{ openshift_hosted_registry_name }}-{{ openshift_hosted_registry_latest_version.stdout }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' + register: openshift_hosted_registry_rc_phase + until: "'Running' not in openshift_hosted_registry_rc_phase.stdout" + delay: 15 + retries: 40 + failed_when: "'Failed' in openshift_hosted_registry_rc_phase.stdout" - include: storage/glusterfs.yml when: diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index 3382abd7b..160ae2f5e 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -90,9 +90,36 @@ stats_port: "{{ item.stats_port }}" with_items: "{{ openshift_hosted_routers }}" -- name: Ensure OpenShift router correctly rolls out +- name: Ensure OpenShift router correctly rolls out (best-effort today) command: | oc rollout status deploymentconfig {{ item.name }} \ --namespace {{ item.namespace | default('default') }} \ --config {{ openshift.common.config_base }}/master/admin.kubeconfig + async: 600 + poll: 15 with_items: "{{ openshift_hosted_routers }}" + failed_when: false + +- name: Determine the latest version of the OpenShift router deployment + command: | + oc get deploymentconfig {{ item.name }} \ + --namespace {{ item.namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .status.latestVersion }' + register: openshift_hosted_routers_latest_version + with_items: "{{ openshift_hosted_routers }}" + +- name: Poll for OpenShift router deployment success + command: | + oc get replicationcontroller {{ item.0.name }}-{{ item.1.stdout }} \ + --namespace {{ item.0.namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' + register: openshift_hosted_router_rc_phase + until: "'Running' not in openshift_hosted_router_rc_phase.stdout" + delay: 15 + retries: 40 + failed_when: "'Failed' in openshift_hosted_router_rc_phase.stdout" + with_together: + - "{{ openshift_hosted_routers }}" + - "{{ openshift_hosted_routers_latest_version.results }}" -- cgit v1.2.3