summaryrefslogtreecommitdiffstats
path: root/playbooks/common
diff options
context:
space:
mode:
authorDevan Goodwin <dgoodwin@redhat.com>2016-06-21 15:01:01 -0300
committerDevan Goodwin <dgoodwin@redhat.com>2016-06-23 10:42:47 -0300
commit82f4e4eaeaaf3059013e9ea23d87dcf89fd8455e (patch)
treeb90f8a8db3b11ea10b317c88e8722569a12112e6 /playbooks/common
parent5c7e3e9d3a36062a6bf0d79da0c95e1f2e17b9a0 (diff)
downloadopenshift-82f4e4eaeaaf3059013e9ea23d87dcf89fd8455e.tar.gz
openshift-82f4e4eaeaaf3059013e9ea23d87dcf89fd8455e.tar.bz2
openshift-82f4e4eaeaaf3059013e9ea23d87dcf89fd8455e.tar.xz
openshift-82f4e4eaeaaf3059013e9ea23d87dcf89fd8455e.zip
Refactor 3.2 upgrade to avoid killing nodes without evac.
We now handle the two pieces of upgrade that require a node evac in the same play. (docker, and node itself)
Diffstat (limited to 'playbooks/common')
-rw-r--r--playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml52
-rw-r--r--playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml37
-rw-r--r--playbooks/common/openshift-cluster/upgrades/files/nuke_images.sh23
-rw-r--r--playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_node_upgrade.yml (renamed from playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_upgrade.yml)0
-rw-r--r--playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/docker_upgrade.yml14
-rw-r--r--playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/node_upgrade.yml24
-rw-r--r--playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/pre.yml2
-rw-r--r--playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml72
8 files changed, 150 insertions, 74 deletions
diff --git a/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml
new file mode 100644
index 000000000..78b123881
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml
@@ -0,0 +1,52 @@
+---
+# We need docker service up to remove all the images, but these services will keep
+# trying to re-start and thus re-pull the images we're trying to delete.
+- name: stop containerized services
+ service: name={{ item }} state=stopped
+ with_items:
+ - "{{ openshift.common.service_type }}-master"
+ - "{{ openshift.common.service_type }}-master-api"
+ - "{{ openshift.common.service_type }}-master-controllers"
+ - "{{ openshift.common.service_type }}-node"
+ - etcd_container
+ - openvswitch
+ failed_when: false
+ when: docker_upgrade is defined and docker_upgrade | bool and openshift.common.is_containerized | bool
+
+- name: remove all containers and images
+ script: nuke_images.sh docker
+ register: nuke_images_result
+ when: docker_upgrade is defined and docker_upgrade | bool
+
+# todo: should we use the docker role to actually do the upgrade?
+- name: upgrade to specified docker version
+ action: "{{ ansible_pkg_mgr }} name=docker{{ '-' + docker_version }} state=present"
+ register: docker_upgrade_result
+ when: docker_upgrade is defined and docker_upgrade | bool and docker_version is defined
+
+- name: upgrade to latest docker version
+ action: "{{ ansible_pkg_mgr }} name=docker state=latest"
+ register: docker_upgrade_result
+ when: docker_upgrade is defined and docker_upgrade | bool and docker_version is not defined
+
+- name: restart containerized services
+ service: name={{ item }} state=started
+ with_items:
+ - etcd_container
+ - openvswitch
+ - "{{ openshift.common.service_type }}-master"
+ - "{{ openshift.common.service_type }}-master-api"
+ - "{{ openshift.common.service_type }}-master-controllers"
+ - "{{ openshift.common.service_type }}-node"
+ failed_when: false
+ when: docker_upgrade is defined and docker_upgrade | bool and openshift.common.is_containerized | bool
+
+- name: wait for master api to come back online
+ become: no
+ local_action:
+ module: wait_for
+ host="{{ inventory_hostname }}"
+ state=started
+ delay=10
+ port="{{ openshift.master.api_port }}"
+ when: docker_upgrade is defined and docker_upgrade | bool and inventory_hostname in groups.oo_masters_to_config
diff --git a/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml b/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml
new file mode 100644
index 000000000..928913ef3
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml
@@ -0,0 +1,37 @@
+---
+- name: Determine available Docker version
+ script: ../../../../common/openshift-cluster/upgrades/files/rpm_versions.sh docker
+ register: g_docker_version_result
+
+- name: Check if Docker is installed
+ command: rpm -q docker
+ register: pkg_check
+ failed_when: pkg_check.rc > 1
+ changed_when: no
+
+- name: Get current version of Docker
+ command: "{{ repoquery_cmd }} --installed --qf '%{version}' docker"
+ register: curr_docker_version
+ changed_when: false
+
+- name: Get latest available version of Docker
+ command: >
+ {{ repoquery_cmd }} --qf '%{version}' "docker"
+ register: avail_docker_version
+ failed_when: false
+ changed_when: false
+
+- fail:
+ msg: This playbook requires access to Docker 1.10 or later
+ # Disable the 1.10 requirement if the user set a specific Docker version
+ when: avail_docker_version.stdout | version_compare('1.10','<') and docker_version is not defined
+
+- name: Flag for upgrade if Docker version does not equal latest
+ set_fact:
+ docker_upgrade: true
+ when: docker_version is not defined and pkg_check.rc == 0 and curr_docker_version.stdout | version_compare(avail_docker_version.stdout,'<')
+
+- name: Flag for upgrade if Docker version does not equal requested version
+ set_fact:
+ docker_upgrade: true
+ when: docker_version is defined and pkg_check.rc == 0 and curr_docker_version.stdout | version_compare(docker_version,'<')
diff --git a/playbooks/common/openshift-cluster/upgrades/files/nuke_images.sh b/playbooks/common/openshift-cluster/upgrades/files/nuke_images.sh
new file mode 100644
index 000000000..9a5ee2276
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/files/nuke_images.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Stop any running containers
+running_container_count=`docker ps -q | wc -l`
+if test $running_container_count -gt 0
+then
+ docker stop $(docker ps -q)
+fi
+
+# Delete all containers
+container_count=`docker ps -a -q | wc -l`
+if test $container_count -gt 0
+then
+ docker rm -f -v $(docker ps -a -q)
+fi
+
+# Delete all images (forcefully)
+image_count=`docker images -q | wc -l`
+if test $image_count -gt 0
+then
+ # Taken from: https://gist.github.com/brianclements/f72b2de8e307c7b56689#gistcomment-1443144
+ docker rmi $(docker images | grep "$2/\|/$2 \| $2 \|$2 \|$2-\|$2_" | awk '{print $1 ":" $2}') 2>/dev/null || echo "No images matching \"$2\" left to purge."
+fi
diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_node_upgrade.yml
index 319758a06..319758a06 100644
--- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_upgrade.yml
+++ b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/containerized_node_upgrade.yml
diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/docker_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/docker_upgrade.yml
deleted file mode 100644
index c7b18f51b..000000000
--- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/docker_upgrade.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-- name: Check if Docker is installed
- command: rpm -q docker
- register: pkg_check
- failed_when: pkg_check.rc > 1
- changed_when: no
-
-- name: Upgrade Docker
- command: "{{ ansible_pkg_mgr}} update -y docker"
- when: pkg_check.rc == 0 and g_docker_version.curr_version | version_compare('1.9','<')
- register: docker_upgrade
-
-- name: Restart Docker
- command: systemctl restart docker
- when: docker_upgrade | changed
diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/node_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/node_upgrade.yml
deleted file mode 100644
index a911f12be..000000000
--- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/node_upgrade.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-- name: Prepare for Node evacuation
- command: >
- {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=false
- delegate_to: "{{ groups.oo_first_master.0 }}"
-
-- name: Evacuate Node for Kubelet upgrade
- command: >
- {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --evacuate --force
- delegate_to: "{{ groups.oo_first_master.0 }}"
-
-- include: rpm_upgrade.yml
- vars:
- component: "node"
- openshift_version: "{{ openshift_pkg_version | default('') }}"
- when: not openshift.common.is_containerized | bool
-
-- include: containerized_upgrade.yml
- when: openshift.common.is_containerized | bool
-
-- name: Set node schedulability
- command: >
- {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=true
- delegate_to: "{{ groups.oo_first_master.0 }}"
- when: openshift.node.schedulable | bool
diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/pre.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/pre.yml
index ec07f0a60..55ede13f0 100644
--- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/pre.yml
+++ b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/pre.yml
@@ -3,7 +3,7 @@
# Evaluate host groups and gather facts
###############################################################################
-- include: ../../common/openshift-cluster/initialize_facts.yml
+- include: ../../initialize_facts.yml
- name: Update repos
hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config:oo_lb_to_config
diff --git a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml
index 66f6f8e71..8eeb652a7 100644
--- a/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml
+++ b/playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml
@@ -3,19 +3,6 @@
# The restart playbook should be run after this playbook completes.
###############################################################################
-- name: Upgrade docker
- hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
- roles:
- - openshift_facts
- tasks:
- - include: docker_upgrade.yml
- when: not openshift.common.is_atomic | bool
- - name: Set post docker install facts
- openshift_facts:
- role: "{{ item.role }}"
- with_items:
- - role: docker
-
###############################################################################
# Upgrade Masters
###############################################################################
@@ -68,36 +55,51 @@
###############################################################################
# Upgrade Nodes
###############################################################################
-- name: Upgrade nodes
- hosts: oo_nodes_to_config
+
+# Here we handle all tasks that might require a node evac. (upgrading docker, and the node service)
+- name: Perform upgrades that may require node evacuation
+ hosts: oo_masters_to_config:oo_etcd_to_config:oo_nodes_to_config
serial: 1
+ any_errors_fatal: true
roles:
- openshift_facts
handlers:
- include: ../../../../../roles/openshift_node/handlers/main.yml
tasks:
- - include: node_upgrade.yml
+ # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
+ # or docker actually needs an upgrade before proceeding.
+ - name: Mark unschedulable if host is a node
+ command: >
+ {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=false
+ delegate_to: "{{ groups.oo_first_master.0 }}"
+ when: inventory_hostname in groups.oo_nodes_to_config
- - set_fact:
- node_update_complete: True
+ - name: Evacuate Node for Kubelet upgrade
+ command: >
+ {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --evacuate --force
+ delegate_to: "{{ groups.oo_first_master.0 }}"
+ when: inventory_hostname in groups.oo_nodes_to_config
+
+ - include: ../docker/upgrade_check.yml
+
+ - include: ../docker/upgrade.yml
+ when: docker_upgrade is defined and docker_upgrade | bool
+
+ - include: rpm_upgrade.yml
+ vars:
+ component: "node"
+ openshift_version: "{{ openshift_pkg_version | default('') }}"
+ when: inventory_hostname in groups.oo_nodes_to_config and not openshift.common.is_containerized | bool
+
+ - include: containerized_node_upgrade.yml
+ when: inventory_hostname in groups.oo_nodes_to_config and openshift.common.is_containerized | bool
+
+ - name: Set node schedulability
+ command: >
+ {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=true
+ delegate_to: "{{ groups.oo_first_master.0 }}"
+ when: inventory_hostname in groups.oo_nodes_to_config and openshift.node.schedulable | bool
-##############################################################################
-# Gate on nodes update
-##############################################################################
-- name: Gate on nodes update
- hosts: localhost
- connection: local
- become: no
- tasks:
- - set_fact:
- node_update_completed: "{{ hostvars
- | oo_select_keys(groups.oo_nodes_to_config)
- | oo_collect('inventory_hostname', {'node_update_complete': true}) }}"
- - set_fact:
- node_update_failed: "{{ groups.oo_nodes_to_config | difference(node_update_completed) }}"
- - fail:
- msg: "Upgrade cannot continue. The following nodes did not finish updating: {{ node_update_failed | join(',') }}"
- when: node_update_failed | length > 0
###############################################################################
# Reconcile Cluster Roles, Cluster Role Bindings and Security Context Constraints