From f8d5693489cb95e6a7ccfcc5b33d99115f7da5d3 Mon Sep 17 00:00:00 2001
From: Devan Goodwin <dgoodwin@redhat.com>
Date: Thu, 15 Dec 2016 14:55:10 -0400
Subject: Wait for nodes to be ready before proceeding with upgrade.

Near the end of node upgrade, we now wait for the node to report Ready
before marking it schedulable again. This should help eliminate delays
when pods need to relocate as the next node in line is evacuated.

Happens near the end of the process, the only remaining task would be to
mark it schedulable again so easy for admins to detect and recover from.
---
 .../common/openshift-cluster/upgrades/upgrade_nodes.yml     | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml')

diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
index cefc7d12b..b3ac34d90 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
@@ -87,6 +87,19 @@
   - name: Restart rpm node service
     service: name="{{ openshift.common.service_type }}-node" state=restarted
     when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool
+
+  - name: Wait for node to be ready
+    command: >
+      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} get node {{ openshift.common.hostname | lower }} --no-headers
+    register: node_output
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade
+    until: "{{ node_output.stdout.split()[1].startswith('Ready')}}"
+    # Give the node two minutes to come back online. Note that we pre-pull images now
+    # so containerized services should restart quickly as well.
+    retries: 24
+    delay: 5
+
   - name: Set node schedulability
     command: >
       {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true
-- 
cgit v1.2.3