From 483e58decb4daae9c355a5be8f090d1778133143 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Wed, 5 Apr 2017 15:43:39 -0400 Subject: Copy v3 data dir when performing backup Fixes https://bugzilla.redhat.com/show_bug.cgi?id=1433272 --- .../common/openshift-cluster/upgrades/etcd/backup.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml index 7ef79afa9..347621a14 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -3,7 +3,6 @@ hosts: etcd_hosts_to_backup vars: embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" etcdctl_command: "{{ 'etcdctl' if not openshift.common.is_containerized or embedded_etcd else 'docker exec etcd_container etcdctl' if not openshift.common.is_etcd_system_container else 'runc exec etcd etcdctl' }}" roles: - openshift_facts @@ -13,6 +12,8 @@ role: etcd local_facts: {} when: "'etcd' not in openshift" + - set_fact: + timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" - stat: path=/var/lib/openshift register: var_lib_openshift @@ -77,6 +78,19 @@ {{ etcdctl_command }} backup --data-dir={{ openshift.etcd.etcd_data_dir }} --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }} + # According to the docs change you can simply copy snap/db + # https://github.com/openshift/openshift-docs/commit/b38042de02d9780842dce95cfa0ef45d53b58bc6 + - name: Check for v3 data store + stat: + path: "{{ openshift.etcd.etcd_data_dir }}/member/snap/db" + register: v3_db + + - name: Copy etcd v3 data store + command: > + cp -a {{ openshift.etcd.etcd_data_dir }}/member/snap + {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}/member/ + when: v3_db.stat.exists + - set_fact: etcd_backup_complete: True -- cgit v1.2.3 From da3c31cae2870a66a0524ae06c37a3fb44e1b312 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Fri, 7 Apr 2017 09:13:43 -0400 Subject: Create member/snap directory encase it doesn't exist Fixes TASK [Copy etcd v3 data store] ************************************************* fatal: [host.redhat.com]: FAILED! => { "changed": true, "cmd": [ "cp", "-a", "/var/lib/etcd//member/snap", "/var/lib/origin/etcd-backup-pre-upgrade-20170407055413/member/" ], "delta": "0:00:00.003152", "end": "2017-04-07 01:54:17.584685", "failed": true, "rc": 1, "start": "2017-04-07 01:54:17.581533", "warnings": [] } STDERR: cp: cannot create directory ?/var/lib/origin/etcd-backup-pre-upgrade-20170407055413/member/?: No such file or directory --- playbooks/common/openshift-cluster/upgrades/etcd/backup.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml index 347621a14..31a6c6c66 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -85,10 +85,15 @@ path: "{{ openshift.etcd.etcd_data_dir }}/member/snap/db" register: v3_db + - name: Ensure v3 backup directory exists + file: + path: "{{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}/member/snap" + state: directory + - name: Copy etcd v3 data store command: > - cp -a {{ openshift.etcd.etcd_data_dir }}/member/snap - {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}/member/ + cp -a {{ openshift.etcd.etcd_data_dir }}/member/snap/db + {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}/member/snap/ when: v3_db.stat.exists - set_fact: -- cgit v1.2.3 From 1ce31d156ef800da73544622558da9aa0fc647e4 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 10 Apr 2017 13:13:32 -0400 Subject: Store backups in /var/lib/etcd/openshift-backup Because containerized installs don't mount /var/lib/origin and we switched to running the backup inside the container that meant that we were backing up the etcd data into a directory inside the container filesystem. Since we have no other volume mounted we need to backup into /var/lib/etcd. --- .../openshift-cluster/upgrades/etcd/backup.yml | 31 +++++++++------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml index 31a6c6c66..6c763ce46 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -4,6 +4,7 @@ vars: embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" etcdctl_command: "{{ 'etcdctl' if not openshift.common.is_containerized or embedded_etcd else 'docker exec etcd_container etcdctl' if not openshift.common.is_etcd_system_container else 'runc exec etcd etcdctl' }}" + timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" roles: - openshift_facts tasks: @@ -13,30 +14,20 @@ local_facts: {} when: "'etcd' not in openshift" - set_fact: - timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" - - - stat: path=/var/lib/openshift - register: var_lib_openshift - - - stat: path=/var/lib/origin - register: var_lib_origin - - - name: Create origin symlink if necessary - file: src=/var/lib/openshift/ dest=/var/lib/origin state=link - when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False + etcd_backup_dir: "{{ openshift.etcd.etcd_data_dir }}/openshift-backup-{{ backup_tag | default('') }}{{ timestamp }}" # TODO: replace shell module with command and update later checks # We assume to be using the data dir for all backups. - name: Check available disk space for etcd backup - shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 + shell: df --output=avail -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 register: avail_disk # AUDIT:changed_when: `false` because we are only inspecting # state, not manipulating anything changed_when: false # TODO: replace shell module with command and update later checks - - name: Check current embedded etcd disk usage - shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 + - name: Check current etcd disk usage + shell: du --exclude='*openshift-backup*' -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 register: etcd_disk_usage when: embedded_etcd | bool # AUDIT:changed_when: `false` because we are only inspecting @@ -71,12 +62,12 @@ package: name: etcd state: latest - when: ( embedded_etcd | bool or openshift.common.is_containerized ) and not openshift.common.is_atomic + when: embedded_etcd | bool - name: Generate etcd backup command: > {{ etcdctl_command }} backup --data-dir={{ openshift.etcd.etcd_data_dir }} - --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }} + --backup-dir={{ etcd_backup_dir }} # According to the docs change you can simply copy snap/db # https://github.com/openshift/openshift-docs/commit/b38042de02d9780842dce95cfa0ef45d53b58bc6 @@ -85,15 +76,17 @@ path: "{{ openshift.etcd.etcd_data_dir }}/member/snap/db" register: v3_db + # TODO: this should be safe to remove now that we've figured out that we were dumping the data + # inside the container - name: Ensure v3 backup directory exists file: - path: "{{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}/member/snap" + path: "{{ etcd_backup_dir }}/member/snap" state: directory - name: Copy etcd v3 data store command: > cp -a {{ openshift.etcd.etcd_data_dir }}/member/snap/db - {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}/member/snap/ + {{ etcd_backup_dir }}/member/snap/ when: v3_db.stat.exists - set_fact: @@ -101,7 +94,7 @@ - name: Display location of etcd backup debug: - msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}" + msg: "Etcd backup created in {{ etcd_backup_dir }}" - name: Gate on etcd backup hosts: localhost -- cgit v1.2.3 From 974f01c81d11027a7d944fc547df4ab557e99e58 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 10 Apr 2017 13:27:57 -0400 Subject: Cleanup comments and remove extraneous tasks --- .../openshift-cluster/upgrades/etcd/backup.yml | 30 +++++----------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml index 6c763ce46..fb51a0061 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -17,7 +17,6 @@ etcd_backup_dir: "{{ openshift.etcd.etcd_data_dir }}/openshift-backup-{{ backup_tag | default('') }}{{ timestamp }}" # TODO: replace shell module with command and update later checks - # We assume to be using the data dir for all backups. - name: Check available disk space for etcd backup shell: df --output=avail -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 register: avail_disk @@ -44,25 +43,17 @@ # For non containerized and non embedded we should have the correct version of # etcd installed already. So don't do anything. # - # For embedded or containerized we need to use the latest because OCP 3.3 uses - # a version of etcd that can only be backed up with etcd-3.x and if it's - # containerized then etcd version may be newer than that on the host so - # upgrade it. + # For containerized installs we now exec into etcd_container # - # On atomic we have neither yum nor dnf so ansible throws a hard to debug error - # if you use package there, like this: "Could not find a module for unknown." - # see https://bugzilla.redhat.com/show_bug.cgi?id=1408668 - # - # TODO - We should refactor all containerized backups to use the containerized - # version of etcd to perform the backup rather than relying on the host's - # binaries. Until we do that we'll continue to have problems backing up etcd - # when atomic host has an older version than the version that's running in the - # container whether that's embedded or not - - name: Install latest etcd for containerized or embedded + # For embedded non containerized we need to ensure we have the latest version + # etcd on the host. + - name: Install latest etcd for embedded package: name: etcd state: latest - when: embedded_etcd | bool + when: + - embedded_etcd | bool + - not openshift.common.is_atomic | bool - name: Generate etcd backup command: > @@ -76,13 +67,6 @@ path: "{{ openshift.etcd.etcd_data_dir }}/member/snap/db" register: v3_db - # TODO: this should be safe to remove now that we've figured out that we were dumping the data - # inside the container - - name: Ensure v3 backup directory exists - file: - path: "{{ etcd_backup_dir }}/member/snap" - state: directory - - name: Copy etcd v3 data store command: > cp -a {{ openshift.etcd.etcd_data_dir }}/member/snap/db -- cgit v1.2.3