15 files changed, 109 insertions, 231 deletions
diff --git a/playbooks/common/openshift-checks/health.yml b/playbooks/common/openshift-checks/health.yml
new file mode 100644
index 000000000..fc0f523d5
--- /dev/null
+++ b/playbooks/common/openshift-checks/health.yml
@@ -0,0 +1,10 @@
+---
+- name: Run OpenShift health checks
+  hosts: OSEv3
+  roles:
+    - openshift_health_checker
+  post_tasks:
+    - action: openshift_health_check  # https://github.com/ansible/ansible/issues/20513
+      args:
+        checks:
+          - '@health'
diff --git a/playbooks/common/openshift-checks/pre-install.yml b/playbooks/common/openshift-checks/pre-install.yml
new file mode 100644
index 000000000..c8ffc3d91
--- /dev/null
+++ b/playbooks/common/openshift-checks/pre-install.yml
@@ -0,0 +1,10 @@
+---
+- hosts: OSEv3
+  name: run OpenShift pre-install checks
+  roles:
+    - openshift_health_checker
+  post_tasks:
+    - action: openshift_health_check  # https://github.com/ansible/ansible/issues/20513
+      args:
+        checks:
+          - '@preflight'
diff --git a/playbooks/common/openshift-checks/roles b/playbooks/common/openshift-checks/roles
new file mode 120000
index 000000000..20c4c58cf
--- /dev/null
+++ b/playbooks/common/openshift-checks/roles
@@ -0,0 +1 @@
+../../../roles
+\ No newline at end of file
diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/ca.yml b/playbooks/common/openshift-cluster/redeploy-certificates/ca.yml
index 4fa7f9cdf..0d0ff798c 100644
--- a/playbooks/common/openshift-cluster/redeploy-certificates/ca.yml
+++ b/playbooks/common/openshift-cluster/redeploy-certificates/ca.yml
@@ -9,7 +9,8 @@
 - name: Backup existing etcd CA certificate directories
   hosts: oo_etcd_to_config
   roles:
-  - etcd_common
+  - role: etcd_common
+    r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
   tasks:
   - name: Determine if CA certificate directory exists
     stat:
@@ -52,7 +53,8 @@
   vars:
     etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}"
   roles:
-  - etcd_common
+  - role: etcd_common
+    r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
   tasks:
   - name: Create a tarball of the etcd ca certs
     command: >
@@ -98,7 +100,8 @@
 - name: Retrieve etcd CA certificate
   hosts: oo_first_etcd
   roles:
-  - etcd_common
+  - role: etcd_common
+    r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
   tasks:
   - name: Retrieve etcd CA certificate
     fetch:
diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/etcd.yml b/playbooks/common/openshift-cluster/redeploy-certificates/etcd.yml
index 2963a5940..6b5c805e6 100644
--- a/playbooks/common/openshift-cluster/redeploy-certificates/etcd.yml
+++ b/playbooks/common/openshift-cluster/redeploy-certificates/etcd.yml
@@ -3,7 +3,8 @@
   hosts: oo_first_etcd
   any_errors_fatal: true
   roles:
-    - etcd_common
+    - role: etcd_common
+      r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
   post_tasks:
     - name: Determine if generated etcd certificates exist
       stat:
@@ -27,7 +28,8 @@
   hosts: oo_etcd_to_config
   any_errors_fatal: true
   roles:
-    - etcd_common
+    - role: etcd_common
+      r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
   post_tasks:
     - name: Backup etcd certificates
       command: >
@@ -50,6 +52,7 @@
       etcd_peers: "{{ groups.oo_etcd_to_config | default([], true) }}"
       etcd_certificates_etcd_hosts: "{{ groups.oo_etcd_to_config | default([], true) }}"
       openshift_ca_host: "{{ groups.oo_first_master.0 }}"
+      r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
 
 - name: Redeploy etcd client certificates for masters
   hosts: oo_masters_to_config
@@ -63,4 +66,5 @@
       etcd_cert_prefix: "master.etcd-"
       openshift_ca_host: "{{ groups.oo_first_master.0 }}"
       openshift_master_count: "{{ openshift.master.master_count | default(groups.oo_masters | length) }}"
+      r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
       when: groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml
index 9d0333ca8..b7fd2c0c5 100644
--- a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml
@@ -1,84 +1,14 @@
 ---
 - name: Backup etcd
   hosts: oo_etcd_hosts_to_backup
-  vars:
-    embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
-    etcdctl_command: "{{ 'etcdctl' if not openshift.common.is_containerized or embedded_etcd else 'docker exec etcd_container etcdctl' if not openshift.common.is_etcd_system_container else 'runc exec etcd etcdctl' }}"
-    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
   roles:
-  - openshift_facts
-  tasks:
-  # Ensure we persist the etcd role for this host in openshift_facts
-  - openshift_facts:
-      role: etcd
-      local_facts: {}
-    when: "'etcd' not in openshift"
-  - set_fact:
-      etcd_backup_dir: "{{ openshift.etcd.etcd_data_dir }}/openshift-backup-{{ backup_tag | default('') }}{{ timestamp }}"
-
-  # TODO: replace shell module with command and update later checks
-  - name: Check available disk space for etcd backup
-    shell: df --output=avail -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1
-    register: avail_disk
-    # AUDIT:changed_when: `false` because we are only inspecting
-    # state, not manipulating anything
-    changed_when: false
-
-  # TODO: replace shell module with command and update later checks
-  - name: Check current etcd disk usage
-    shell: du --exclude='*openshift-backup*' -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1
-    register: etcd_disk_usage
-    when: embedded_etcd | bool
-    # AUDIT:changed_when: `false` because we are only inspecting
-    # state, not manipulating anything
-    changed_when: false
-
-  - name: Abort if insufficient disk space for etcd backup
-    fail:
-      msg: >
-        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
-        {{ avail_disk.stdout }} Kb available.
-    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
-
-  # For non containerized and non embedded we should have the correct version of
-  # etcd installed already. So don't do anything.
-  #
-  # For containerized installs we now exec into etcd_container
-  #
-  # For embedded non containerized we need to ensure we have the latest version
-  # etcd on the host.
-  - name: Install latest etcd for embedded
-    package:
-      name: etcd
-      state: latest
-    when:
-    - embedded_etcd | bool
-    - not openshift.common.is_atomic | bool
-
-  - name: Generate etcd backup
-    command: >
-      {{ etcdctl_command }} backup --data-dir={{ openshift.etcd.etcd_data_dir }}
-      --backup-dir={{ etcd_backup_dir }}
-
-  # According to the docs change you can simply copy snap/db
-  # https://github.com/openshift/openshift-docs/commit/b38042de02d9780842dce95cfa0ef45d53b58bc6
-  - name: Check for v3 data store
-    stat:
-      path: "{{ openshift.etcd.etcd_data_dir }}/member/snap/db"
-    register: v3_db
-
-  - name: Copy etcd v3 data store
-    command: >
-      cp -a {{ openshift.etcd.etcd_data_dir }}/member/snap/db
-      {{ etcd_backup_dir }}/member/snap/
-    when: v3_db.stat.exists
-
-  - set_fact:
-      etcd_backup_complete: True
-
-  - name: Display location of etcd backup
-    debug:
-      msg: "Etcd backup created in {{ etcd_backup_dir }}"
+  - role: openshift_facts
+  - role: etcd_upgrade
+    r_etcd_upgrade_action: backup
+    r_etcd_backup_tag: etcd_backup_tag
+    r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
+    r_etcd_upgrade_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
+    r_etcd_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
 
 - name: Gate on etcd backup
   hosts: localhost
@@ -88,7 +18,7 @@
   - set_fact:
       etcd_backup_completed: "{{ hostvars
                                  | oo_select_keys(groups.oo_etcd_hosts_to_backup)
-                                 | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}"
+                                 | oo_collect('inventory_hostname', {'r_etcd_upgrade_backup_complete': true}) }}"
   - set_fact:
       etcd_backup_failed: "{{ groups.oo_etcd_hosts_to_backup | difference(etcd_backup_completed) }}"
   - fail:
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml
deleted file mode 100644
index 5f8b59e17..000000000
--- a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml
+++ /dev/null
@@ -1,46 +0,0 @@
----
-- name: Verify cluster is healthy pre-upgrade
-  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
-
-- name: Get current image
-  shell: grep 'ExecStart=' /etc/systemd/system/etcd_container.service | awk '{print $NF}'
-  register: current_image
-
-- name: Set new_etcd_image
-  set_fact:
-    new_etcd_image: "{{ current_image.stdout | regex_replace('/etcd.*$','/etcd:' ~ upgrade_version ) }}"
-
-- name: Pull new etcd image
-  command: "docker pull {{ new_etcd_image }}"
-
-- name: Update to latest etcd image
-  replace:
-    dest: /etc/systemd/system/etcd_container.service
-    regexp: "{{ current_image.stdout }}$"
-    replace: "{{ new_etcd_image }}"
-
-- name: Restart etcd_container
-  systemd:
-    name: etcd_container
-    daemon_reload: yes
-    state: restarted
-
-## TODO: probably should just move this into the backup playbooks, also this
-## will fail on atomic host. We need to revisit how to do etcd backups there as
-## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1)
-- name: Upgrade etcd for etcdctl when not atomic
-  package: name=etcd state=latest
-  when: not openshift.common.is_atomic | bool
-
-- name: Verify cluster is healthy
-  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
-  register: etcdctl
-  until: etcdctl.rc == 0
-  retries: 3
-  delay: 10
-
-- name: Store new etcd_image
-  openshift_facts:
-    role: etcd
-    local_facts:
-      etcd_image: "{{ new_etcd_image }}"
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml
deleted file mode 100644
index 30232110e..000000000
--- a/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml
+++ /dev/null
@@ -1,23 +0,0 @@
----
-# F23 GA'd with etcd 2.0, currently has 2.2 in updates
-# F24 GA'd with etcd-2.2, currently has 2.2 in updates
-# F25 Beta currently has etcd 3.0
-- name: Verify cluster is healthy pre-upgrade
-  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
-
-- name: Update etcd
-  package:
-    name: "etcd"
-    state: "latest"
-
-- name: Restart etcd
-  service:
-    name: etcd
-    state: restarted
-
-- name: Verify cluster is healthy
-  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
-  register: etcdctl
-  until: etcdctl.rc == 0
-  retries: 3
-  delay: 10
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml
index d9b59edcb..3e01883ae 100644
--- a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml
@@ -8,7 +8,7 @@
 - name: Backup etcd before upgrading anything
   include: backup.yml
   vars:
-    backup_tag: "pre-upgrade-"
+    etcd_backup_tag: "pre-upgrade-"
   when: openshift_etcd_backup | default(true) | bool
 
 - name: Drop etcdctl profiles
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml
deleted file mode 100644
index 3a972e8ab..000000000
--- a/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml
+++ /dev/null
@@ -1,20 +0,0 @@
----
-- name: Verify cluster is healthy pre-upgrade
-  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
-
-- name: Update etcd RPM
-  package:
-    name: etcd-{{ upgrade_version }}*
-    state: latest
-
-- name: Restart etcd
-  service:
-    name: etcd
-    state: restarted
-
-- name: Verify cluster is healthy
-  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
-  register: etcdctl
-  until: etcdctl.rc == 0
-  retries: 3
-  delay: 10
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml
index 54f9e21a1..0431c1ce0 100644
--- a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml
@@ -12,10 +12,10 @@
       # AUDIT:changed_when: `false` because we are only inspecting
       # state, not manipulating anything
       changed_when: false
-
     - debug:
         msg: "Etcd rpm version {{ etcd_rpm_version.stdout }} detected"
-    when: not openshift.common.is_containerized | bool
+    when:
+    - not openshift.common.is_containerized | bool
 
   - block:
     - name: Record containerized etcd version (docker)
@@ -54,84 +54,57 @@
 
     - debug:
         msg: "Etcd containerized version {{ etcd_container_version }} detected"
-
     when:
     - openshift.common.is_containerized | bool
 
-# I really dislike this copy/pasta but I wasn't able to find a way to get it to loop
-# through hosts, then loop through tasks only when appropriate
-- name: Upgrade to 2.1
-  hosts: oo_etcd_hosts_to_upgrade
-  serial: 1
+- include: upgrade_rpm_members.yml
   vars:
-    upgrade_version: '2.1'
-  tasks:
-  - include: rhel_tasks.yml
-    when: etcd_rpm_version.stdout | default('99') | version_compare('2.1','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool
+    etcd_upgrade_version: '2.1'
 
-- name: Upgrade RPM hosts to 2.2
-  hosts: oo_etcd_hosts_to_upgrade
-  serial: 1
+- include: upgrade_rpm_members.yml
   vars:
-    upgrade_version: '2.2'
-  tasks:
-  - include: rhel_tasks.yml
-    when: etcd_rpm_version.stdout | default('99') | version_compare('2.2','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool
+    etcd_upgrade_version: '2.2'
 
-- name: Upgrade containerized hosts to 2.2.5
-  hosts: oo_etcd_hosts_to_upgrade
-  serial: 1
+- include: upgrade_image_members.yml
   vars:
-    upgrade_version: 2.2.5
-  tasks:
-  - include: containerized_tasks.yml
-    when: etcd_container_version | default('99') | version_compare('2.2','<') and openshift.common.is_containerized | bool
+    etcd_upgrade_version: '2.2.5'
 
-- name: Upgrade RPM hosts to 2.3
-  hosts: oo_etcd_hosts_to_upgrade
-  serial: 1
+- include: upgrade_rpm_members.yml
   vars:
-    upgrade_version: '2.3'
-  tasks:
-  - include: rhel_tasks.yml
-    when: etcd_rpm_version.stdout | default('99') | version_compare('2.3','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool
+    etcd_upgrade_version: '2.3'
 
-- name: Upgrade containerized hosts to 2.3.7
-  hosts: oo_etcd_hosts_to_upgrade
-  serial: 1
+- include: upgrade_image_members.yml
   vars:
-    upgrade_version: 2.3.7
-  tasks:
-  - include: containerized_tasks.yml
-    when: etcd_container_version | default('99') | version_compare('2.3','<') and openshift.common.is_containerized | bool
+    etcd_upgrade_version: '2.3.7'
 
-- name: Upgrade RPM hosts to 3.0
-  hosts: oo_etcd_hosts_to_upgrade
-  serial: 1
+- include: upgrade_rpm_members.yml
   vars:
-    upgrade_version: '3.0'
-  tasks:
-  - include: rhel_tasks.yml
-    when: etcd_rpm_version.stdout | default('99') | version_compare('3.0','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool
+    etcd_upgrade_version: '3.0'
 
-- name: Upgrade containerized hosts to etcd3 image
-  hosts: oo_etcd_hosts_to_upgrade
-  serial: 1
+- include: upgrade_image_members.yml
   vars:
-    upgrade_version: 3.0.15
-  tasks:
-  - include: containerized_tasks.yml
-    when: etcd_container_version | default('99') | version_compare('3.0','<') and openshift.common.is_containerized | bool
+    etcd_upgrade_version: '3.0.15'
+
+- include: upgrade_rpm_members.yml
+  vars:
+    etcd_upgrade_version: '3.1'
+
+- include: upgrade_image_members.yml
+  vars:
+    etcd_upgrade_version: '3.1.3'
 
 - name: Upgrade fedora to latest
   hosts: oo_etcd_hosts_to_upgrade
   serial: 1
   tasks:
-  - include: fedora_tasks.yml
-    when: ansible_distribution == 'Fedora' and not openshift.common.is_containerized | bool
+  - include_role:
+      name: etcd_upgrade
+    when:
+    - ansible_distribution == 'Fedora'
+    - not openshift.common.is_containerized | bool
 
 - name: Backup etcd
   include: backup.yml
   vars:
-    backup_tag: "post-3.0-"
+    etcd_backup_tag: "post-3.0-"
   when: openshift_etcd_backup | default(true) | bool
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_image_members.yml b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_image_members.yml
new file mode 100644
index 000000000..831ca8f57
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_image_members.yml
@@ -0,0 +1,17 @@
+---
+# INPUT etcd_upgrade_version
+# INPUT etcd_container_version
+# INPUT openshift.common.is_containerized
+- name: Upgrade containerized hosts to {{ etcd_upgrade_version }}
+  hosts: oo_etcd_hosts_to_upgrade
+  serial: 1
+  roles:
+  - role: etcd_upgrade
+    r_etcd_upgrade_action: upgrade
+    r_etcd_upgrade_mechanism: image
+    r_etcd_upgrade_version: "{{ etcd_upgrade_version }}"
+    r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
+    etcd_peer: "{{ openshift.common.hostname }}"
+    when:
+    - etcd_container_version | default('99') | version_compare(etcd_upgrade_version,'<')
+    - openshift.common.is_containerized | bool
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_rpm_members.yml b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_rpm_members.yml
new file mode 100644
index 000000000..2e79451e0
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_rpm_members.yml
@@ -0,0 +1,18 @@
+---
+# INPUT etcd_upgrade_version
+# INPUT etcd_rpm_version
+# INPUT openshift.common.is_containerized
+- name: Upgrade to {{ etcd_upgrade_version }}
+  hosts: oo_etcd_hosts_to_upgrade
+  serial: 1
+  roles:
+  - role: etcd_upgrade
+    r_etcd_upgrade_action: upgrade
+    r_etcd_upgrade_mechanism: rpm
+    r_etcd_upgrade_version: "{{ etcd_upgrade_version }}"
+    r_etcd_common_etcd_runtime: "host"
+    etcd_peer: "{{ openshift.common.hostname }}"
+    when:
+    - etcd_rpm_version.stdout | default('99') | version_compare(etcd_upgrade_version, '<')
+    - ansible_distribution == 'RedHat'
+    - not openshift.common.is_containerized | bool
diff --git a/playbooks/common/openshift-etcd/config.yml b/playbooks/common/openshift-etcd/config.yml
index 1b8106e0e..2cb6197d1 100644
--- a/playbooks/common/openshift-etcd/config.yml
+++ b/playbooks/common/openshift-etcd/config.yml
@@ -7,4 +7,5 @@
     etcd_peers: "{{ groups.oo_etcd_to_config | default([], true) }}"
     etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}"
     etcd_certificates_etcd_hosts: "{{ groups.oo_etcd_to_config | default([], true) }}"
+    r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
   - role: nickhammond.logrotate
diff --git a/playbooks/common/openshift-node/restart.yml b/playbooks/common/openshift-node/restart.yml
index 441b100e9..01cf948e0 100644
--- a/playbooks/common/openshift-node/restart.yml
+++ b/playbooks/common/openshift-node/restart.yml
@@ -51,7 +51,7 @@
     register: node_output
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: inventory_hostname in groups.oo_nodes_to_config
-    until: node_output.results.results[0].status.conditions | selectattr('type', 'match', '^Ready$') | map(attribute='status') | join | bool == True
+    until: node_output.results.returncode == 0 and node_output.results.results[0].status.conditions | selectattr('type', 'match', '^Ready$') | map(attribute='status') | join | bool == True
     # Give the node two minutes to come back online.
     retries: 24
     delay: 5