From 82f4e4eaeaaf3059013e9ea23d87dcf89fd8455e Mon Sep 17 00:00:00 2001 From: Devan Goodwin Date: Tue, 21 Jun 2016 15:01:01 -0300 Subject: Refactor 3.2 upgrade to avoid killing nodes without evac. We now handle the two pieces of upgrade that require a node evac in the same play. (docker, and node itself) --- .../v3_1_to_v3_2/containerized_node_upgrade.yml | 11 ++++ .../v3_1_to_v3_2/containerized_upgrade.yml | 11 ---- .../upgrades/v3_1_to_v3_2/docker_upgrade.yml | 14 ----- .../upgrades/v3_1_to_v3_2/node_upgrade.yml | 24 -------- .../upgrades/v3_1_to_v3_2/pre.yml | 2 +- .../upgrades/v3_1_to_v3_2/upgrade.yml | 72 +++++++++++----------- 6 files changed, 49 insertions(+), 85 deletions(-) create mode 100644 playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_node_upgrade.yml delete mode 100644 playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_upgrade.yml delete mode 100644 playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/docker_upgrade.yml delete mode 100644 playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/node_upgrade.yml (limited to 'playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2') diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_node_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_node_upgrade.yml new file mode 100644 index 000000000..319758a06 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_node_upgrade.yml @@ -0,0 +1,11 @@ +- include_vars: ../../../../../roles/openshift_node/vars/main.yml + +- name: Update systemd units + include: ../../../../../roles/openshift_node/tasks/systemd_units.yml openshift_version=v{{ g_new_version }} + +- name: Verifying the correct version was configured + shell: grep {{ verify_upgrade_version }} {{ item }} + with_items: + - /etc/sysconfig/openvswitch + - /etc/sysconfig/{{ openshift.common.service_type }}* + when: verify_upgrade_version is defined diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_upgrade.yml deleted file mode 100644 index 319758a06..000000000 --- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_upgrade.yml +++ /dev/null @@ -1,11 +0,0 @@ -- include_vars: ../../../../../roles/openshift_node/vars/main.yml - -- name: Update systemd units - include: ../../../../../roles/openshift_node/tasks/systemd_units.yml openshift_version=v{{ g_new_version }} - -- name: Verifying the correct version was configured - shell: grep {{ verify_upgrade_version }} {{ item }} - with_items: - - /etc/sysconfig/openvswitch - - /etc/sysconfig/{{ openshift.common.service_type }}* - when: verify_upgrade_version is defined diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/docker_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/docker_upgrade.yml deleted file mode 100644 index c7b18f51b..000000000 --- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/docker_upgrade.yml +++ /dev/null @@ -1,14 +0,0 @@ -- name: Check if Docker is installed - command: rpm -q docker - register: pkg_check - failed_when: pkg_check.rc > 1 - changed_when: no - -- name: Upgrade Docker - command: "{{ ansible_pkg_mgr}} update -y docker" - when: pkg_check.rc == 0 and g_docker_version.curr_version | version_compare('1.9','<') - register: docker_upgrade - -- name: Restart Docker - command: systemctl restart docker - when: docker_upgrade | changed diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/node_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/node_upgrade.yml deleted file mode 100644 index a911f12be..000000000 --- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/node_upgrade.yml +++ /dev/null @@ -1,24 +0,0 @@ -- name: Prepare for Node evacuation - command: > - {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=false - delegate_to: "{{ groups.oo_first_master.0 }}" - -- name: Evacuate Node for Kubelet upgrade - command: > - {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --evacuate --force - delegate_to: "{{ groups.oo_first_master.0 }}" - -- include: rpm_upgrade.yml - vars: - component: "node" - openshift_version: "{{ openshift_pkg_version | default('') }}" - when: not openshift.common.is_containerized | bool - -- include: containerized_upgrade.yml - when: openshift.common.is_containerized | bool - -- name: Set node schedulability - command: > - {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=true - delegate_to: "{{ groups.oo_first_master.0 }}" - when: openshift.node.schedulable | bool diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/pre.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/pre.yml index ec07f0a60..55ede13f0 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/pre.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/pre.yml @@ -3,7 +3,7 @@ # Evaluate host groups and gather facts ############################################################################### -- include: ../../common/openshift-cluster/initialize_facts.yml +- include: ../../initialize_facts.yml - name: Update repos hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config:oo_lb_to_config diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml index 66f6f8e71..8eeb652a7 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml @@ -3,19 +3,6 @@ # The restart playbook should be run after this playbook completes. ############################################################################### -- name: Upgrade docker - hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config - roles: - - openshift_facts - tasks: - - include: docker_upgrade.yml - when: not openshift.common.is_atomic | bool - - name: Set post docker install facts - openshift_facts: - role: "{{ item.role }}" - with_items: - - role: docker - ############################################################################### # Upgrade Masters ############################################################################### @@ -68,36 +55,51 @@ ############################################################################### # Upgrade Nodes ############################################################################### -- name: Upgrade nodes - hosts: oo_nodes_to_config + +# Here we handle all tasks that might require a node evac. (upgrading docker, and the node service) +- name: Perform upgrades that may require node evacuation + hosts: oo_masters_to_config:oo_etcd_to_config:oo_nodes_to_config serial: 1 + any_errors_fatal: true roles: - openshift_facts handlers: - include: ../../../../../roles/openshift_node/handlers/main.yml tasks: - - include: node_upgrade.yml + # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node + # or docker actually needs an upgrade before proceeding. + - name: Mark unschedulable if host is a node + command: > + {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=false + delegate_to: "{{ groups.oo_first_master.0 }}" + when: inventory_hostname in groups.oo_nodes_to_config - - set_fact: - node_update_complete: True + - name: Evacuate Node for Kubelet upgrade + command: > + {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --evacuate --force + delegate_to: "{{ groups.oo_first_master.0 }}" + when: inventory_hostname in groups.oo_nodes_to_config + + - include: ../docker/upgrade_check.yml + + - include: ../docker/upgrade.yml + when: docker_upgrade is defined and docker_upgrade | bool + + - include: rpm_upgrade.yml + vars: + component: "node" + openshift_version: "{{ openshift_pkg_version | default('') }}" + when: inventory_hostname in groups.oo_nodes_to_config and not openshift.common.is_containerized | bool + + - include: containerized_node_upgrade.yml + when: inventory_hostname in groups.oo_nodes_to_config and openshift.common.is_containerized | bool + + - name: Set node schedulability + command: > + {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=true + delegate_to: "{{ groups.oo_first_master.0 }}" + when: inventory_hostname in groups.oo_nodes_to_config and openshift.node.schedulable | bool -############################################################################## -# Gate on nodes update -############################################################################## -- name: Gate on nodes update - hosts: localhost - connection: local - become: no - tasks: - - set_fact: - node_update_completed: "{{ hostvars - | oo_select_keys(groups.oo_nodes_to_config) - | oo_collect('inventory_hostname', {'node_update_complete': true}) }}" - - set_fact: - node_update_failed: "{{ groups.oo_nodes_to_config | difference(node_update_completed) }}" - - fail: - msg: "Upgrade cannot continue. The following nodes did not finish updating: {{ node_update_failed | join(',') }}" - when: node_update_failed | length > 0 ############################################################################### # Reconcile Cluster Roles, Cluster Role Bindings and Security Context Constraints -- cgit v1.2.3