From 5c24cf417b08e0b427435d1bd5d27a4b03467092 Mon Sep 17 00:00:00 2001 From: Devan Goodwin Date: Thu, 24 Nov 2016 10:00:29 -0400 Subject: Cleanup ovs file and restart docker on every upgrade. In 3.3 one of our services lays down a systemd drop-in for configuring Docker networking to use lbr0. In 3.4, this has been changed but the file must be cleaned up manually by us. However, after removing the file docker requires a restart. This had big implications particularly in containerized environments where upgrade is a very fragile series of upgrading and service restarts. To avoid double docker restarts, and thus double service restarts in containerized environments, this change does the following: - Skip restart during docker upgrade, if it is required. We will restart on our own later. - Skip containerized service restarts when we upgrade the services themselves. - Clean shutdown of all containerized services. - Restart Docker. (always, previously this only happened if it needed an upgrade) - Ensure all containerized services are restarted. - Restart rpm node services. (always) - Mark node schedulable again. At the end of this process, docker0 should be back on the system. --- .../upgrades/containerized_node_upgrade.yml | 17 ++++++++----- .../openshift-cluster/upgrades/docker/restart.yml | 29 ++++++++++++++++++++++ .../openshift-cluster/upgrades/docker/upgrade.yml | 29 ++-------------------- .../openshift-cluster/upgrades/rpm_upgrade.yml | 4 --- .../upgrades/upgrade_control_plane.yml | 1 + .../openshift-cluster/upgrades/upgrade_nodes.yml | 27 +++++++++++++++++++- 6 files changed, 69 insertions(+), 38 deletions(-) create mode 100644 playbooks/common/openshift-cluster/upgrades/docker/restart.yml (limited to 'playbooks') diff --git a/playbooks/common/openshift-cluster/upgrades/containerized_node_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/containerized_node_upgrade.yml index 439df5ffd..9f7961614 100644 --- a/playbooks/common/openshift-cluster/upgrades/containerized_node_upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/containerized_node_upgrade.yml @@ -1,9 +1,14 @@ +--- +# This is a hack to allow us to use systemd_units.yml, but skip the handlers which +# restart services. We will unconditionally restart all containerized services +# because we have to unconditionally restart Docker: +- set_fact: + skip_node_svc_handlers: True + - name: Update systemd units include: ../../../../roles/openshift_node/tasks/systemd_units.yml openshift_version={{ openshift_image_tag }} -- name: Verifying the correct version was configured - shell: grep {{ verify_upgrade_version }} {{ item }} - with_items: - - /etc/sysconfig/openvswitch - - /etc/sysconfig/{{ openshift.common.service_type }}* - when: verify_upgrade_version is defined +# This is a no-op because of skip_node_svc_handlers, but lets us trigger it before end of +# play when the node has already been marked schedulable again. (this would look strange +# in logs otherwise) +- meta: flush_handlers diff --git a/playbooks/common/openshift-cluster/upgrades/docker/restart.yml b/playbooks/common/openshift-cluster/upgrades/docker/restart.yml new file mode 100644 index 000000000..d800b289b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/docker/restart.yml @@ -0,0 +1,29 @@ +--- +- name: Restart docker + service: name=docker state=restarted + +- name: Update docker facts + openshift_facts: + role: docker + +- name: Restart containerized services + service: name={{ item }} state=started + with_items: + - etcd_container + - openvswitch + - "{{ openshift.common.service_type }}-master" + - "{{ openshift.common.service_type }}-master-api" + - "{{ openshift.common.service_type }}-master-controllers" + - "{{ openshift.common.service_type }}-node" + failed_when: false + when: openshift.common.is_containerized | bool + +- name: Wait for master API to come back online + become: no + local_action: + module: wait_for + host="{{ inventory_hostname }}" + state=started + delay=10 + port="{{ openshift.master.api_port }}" + when: inventory_hostname in groups.oo_masters_to_config diff --git a/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml index 5d753447c..44ddf97ad 100644 --- a/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml @@ -37,30 +37,5 @@ - name: Upgrade Docker package: name=docker{{ '-' + docker_version }} state=present -- service: name=docker state=started - -- name: Update docker facts - openshift_facts: - role: docker - -- name: Restart containerized services - service: name={{ item }} state=started - with_items: - - etcd_container - - openvswitch - - "{{ openshift.common.service_type }}-master" - - "{{ openshift.common.service_type }}-master-api" - - "{{ openshift.common.service_type }}-master-controllers" - - "{{ openshift.common.service_type }}-node" - failed_when: false - when: openshift.common.is_containerized | bool - -- name: Wait for master API to come back online - become: no - local_action: - module: wait_for - host="{{ inventory_hostname }}" - state=started - delay=10 - port="{{ openshift.master.api_port }}" - when: inventory_hostname in groups.oo_masters_to_config +- include: restart.yml + when: not skip_docker_restart | default(False) | bool diff --git a/playbooks/common/openshift-cluster/upgrades/rpm_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/rpm_upgrade.yml index d7d1fe548..df2b664d4 100644 --- a/playbooks/common/openshift-cluster/upgrades/rpm_upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/rpm_upgrade.yml @@ -6,7 +6,3 @@ - name: Ensure python-yaml present for config upgrade package: name=PyYAML state=present when: not openshift.common.is_atomic | bool - -- name: Restart node service - service: name="{{ openshift.common.service_type }}-node" state=restarted - when: component == "node" diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 57c25aa41..4d714ef4e 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -29,6 +29,7 @@ - name: Backup etcd include: ./etcd/backup.yml + when: openshift_upgrade_skip_etcd_backup | default(false) | bool - name: Upgrade master packages hosts: oo_masters_to_config diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index 53d670196..bb7955c45 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -44,8 +44,13 @@ {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --evacuate --force delegate_to: "{{ groups.oo_first_master.0 }}" when: inventory_hostname in groups.oo_nodes_to_upgrade + tasks: + - include: docker/upgrade.yml + vars: + # We will restart Docker ourselves after everything is ready: + skip_docker_restart: True when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool - include: "{{ node_config_hook }}" @@ -57,11 +62,31 @@ openshift_version: "{{ openshift_pkg_version | default('') }}" when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool + - name: Remove obsolete docker-sdn-ovs.conf + file: path=/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf state=absent + when: (deployment_type == 'openshift-enterprise' and openshift_release | version_compare('3.4', '>=')) or (deployment_type == 'origin' and openshift_release | version_compare('1.4', '>=')) + - include: containerized_node_upgrade.yml when: inventory_hostname in groups.oo_nodes_to_upgrade and openshift.common.is_containerized | bool - - meta: flush_handlers + - name: Ensure containerized services stopped before Docker restart + service: name={{ item }} state=stopped + with_items: + - etcd_container + - openvswitch + - "{{ openshift.common.service_type }}-master" + - "{{ openshift.common.service_type }}-master-api" + - "{{ openshift.common.service_type }}-master-controllers" + - "{{ openshift.common.service_type }}-node" + failed_when: false + when: openshift.common.is_containerized | bool + + # Mandatory Docker restart, ensure all containerized services are running: + - include: docker/restart.yml + - name: Restart rpm node service + service: name="{{ openshift.common.service_type }}-node" state=restarted + when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool - name: Set node schedulability command: > {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true -- cgit v1.2.3