3 files changed, 65 insertions, 6 deletions
diff --git a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml
index 2bbcbe1f8..9771d5445 100644
--- a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml
+++ b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml
@@ -26,21 +26,26 @@
     registry_url: "{{ openshift.master.registry_url }}"
     openshift_hosted_templates_import_command: replace
   pre_tasks:
+
+  # TODO: remove temp_skip_router_registry_upgrade variable. This is a short term hack
+  # to allow ops to use this control plane upgrade, without triggering router/registry
+  # upgrade which has not yet been synced with their process.
   - name: Collect all routers
     command: >
       {{ oc_cmd }} get pods --all-namespaces -l 'router' -o json
     register: all_routers
     failed_when: false
     changed_when: false
+    when: temp_skip_router_registry_upgrade is not defined
 
   - set_fact: haproxy_routers="{{ (all_routers.stdout | from_json)['items'] | oo_pods_match_component(openshift_deployment_type, 'haproxy-router') | oo_select_keys_from_list(['metadata']) }}"
-    when: all_routers.rc == 0
+    when: all_routers.rc == 0 and temp_skip_router_registry_upgrade is not defined
 
   - set_fact: haproxy_routers=[]
-    when: all_routers.rc != 0
+    when: all_routers.rc != 0 and temp_skip_router_registry_upgrade is not defined
 
   - name: Update router image to current version
-    when: all_routers.rc == 0
+    when: all_routers.rc == 0 and temp_skip_router_registry_upgrade is not defined
     command: >
       {{ oc_cmd }} patch dc/{{ item['labels']['deploymentconfig'] }} -n {{ item['namespace'] }} -p
       '{"spec":{"template":{"spec":{"containers":[{"name":"router","image":"{{ router_image }}","livenessProbe":{"tcpSocket":null,"httpGet":{"path": "/healthz", "port": 1936, "host": "localhost", "scheme": "HTTP"},"initialDelaySeconds":10,"timeoutSeconds":1}}]}}}}'
@@ -53,9 +58,10 @@
     register: _default_registry
     failed_when: false
     changed_when: false
+    when: temp_skip_router_registry_upgrade is not defined
 
   - name: Update registry image to current version
-    when: _default_registry.rc == 0
+    when: _default_registry.rc == 0 and temp_skip_router_registry_upgrade is not defined
     command: >
       {{ oc_cmd }} patch dc/docker-registry -n default -p
       '{"spec":{"template":{"spec":{"containers":[{"name":"registry","image":"{{ registry_image }}"}]}}}}'
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
index 9cad931af..db2c27919 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
@@ -229,3 +229,56 @@
   tasks:
   - include: docker/upgrade.yml
     when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool
+
+- name: Drain and upgrade master nodes
+  hosts: oo_masters_to_config:&oo_nodes_to_upgrade
+  # This var must be set with -e on invocation, as it is not a per-host inventory var
+  # and is evaluated early. Values such as "20%" can also be used.
+  serial: "{{ openshift_upgrade_nodes_serial | default(1) }}"
+  any_errors_fatal: true
+
+  pre_tasks:
+  # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
+  # or docker actually needs an upgrade before proceeding. Perhaps best to save this until
+  # we merge upgrade functionality into the base roles and a normal config.yml playbook run.
+  - name: Determine if node is currently scheduleable
+    command: >
+      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} get node {{ openshift.node.nodename | lower }} -o json
+    register: node_output
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    changed_when: false
+
+  - set_fact:
+      was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}"
+
+  - name: Mark node unschedulable
+    command: >
+      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    # NOTE: There is a transient "object has been modified" error here, allow a couple
+    # retries for a more reliable upgrade.
+    register: node_unsched
+    until: node_unsched.rc == 0
+    retries: 3
+    delay: 1
+
+  - name: Drain Node for Kubelet upgrade
+    command: >
+      {{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --force --delete-local-data
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+
+  roles:
+  - openshift_facts
+  - docker
+  - openshift_node_upgrade
+
+  post_tasks:
+  - name: Set node schedulability
+    command: >
+      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: was_schedulable | bool
+    register: node_sched
+    until: node_sched.rc == 0
+    retries: 3
+    delay: 1
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
index c0746a9e6..59188c570 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
@@ -1,6 +1,6 @@
 ---
 - name: Drain and upgrade nodes
-  hosts: oo_nodes_to_upgrade
+  hosts: oo_nodes_to_upgrade:!oo_masters_to_config
   # This var must be set with -e on invocation, as it is not a per-host inventory var
   # and is evaluated early. Values such as "20%" can also be used.
   serial: "{{ openshift_upgrade_nodes_serial | default(1) }}"
@@ -20,7 +20,7 @@
   - set_fact:
       was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}"
 
-  - name: Mark unschedulable if host is a node
+  - name: Mark node unschedulable
     command: >
       {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false
     delegate_to: "{{ groups.oo_first_master.0 }}"