From f8d5693489cb95e6a7ccfcc5b33d99115f7da5d3 Mon Sep 17 00:00:00 2001 From: Devan Goodwin Date: Thu, 15 Dec 2016 14:55:10 -0400 Subject: Wait for nodes to be ready before proceeding with upgrade. Near the end of node upgrade, we now wait for the node to report Ready before marking it schedulable again. This should help eliminate delays when pods need to relocate as the next node in line is evacuated. Happens near the end of the process, the only remaining task would be to mark it schedulable again so easy for admins to detect and recover from. --- .../common/openshift-cluster/upgrades/upgrade_nodes.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml') diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index cefc7d12b..b3ac34d90 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -87,6 +87,19 @@ - name: Restart rpm node service service: name="{{ openshift.common.service_type }}-node" state=restarted when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool + + - name: Wait for node to be ready + command: > + {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} get node {{ openshift.common.hostname | lower }} --no-headers + register: node_output + delegate_to: "{{ groups.oo_first_master.0 }}" + when: inventory_hostname in groups.oo_nodes_to_upgrade + until: "{{ node_output.stdout.split()[1].startswith('Ready')}}" + # Give the node two minutes to come back online. Note that we pre-pull images now + # so containerized services should restart quickly as well. + retries: 24 + delay: 5 + - name: Set node schedulability command: > {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true -- cgit v1.2.3