diff options
| author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2017-09-23 12:31:03 -0700 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-09-23 12:31:03 -0700 | 
| commit | e0504eec9738b69c4c82b693aa661804a8b20c26 (patch) | |
| tree | 56f76651e457f8e3e5dfe22ff8a9cf5e460b7401 /roles/etcd/tasks | |
| parent | 988d9a27e88f7b14282f9e2d5882fa045e7d879c (diff) | |
| parent | f8664e17ef5e6bead61d8471facd2859fd10c180 (diff) | |
Merge pull request #5460 from ingvagabund/consolidate-etcd-migrate-role
Automatic merge from submit-queue
Consolidate etcd migrate role
The PR is based on top of https://github.com/openshift/openshift-ansible/pull/5371 and https://github.com/openshift/openshift-ansible/pull/5451. Once both PRs are merged, I will rebase.
Diffstat (limited to 'roles/etcd/tasks')
| -rw-r--r-- | roles/etcd/tasks/auxiliary/clean_data.yml | 5 | ||||
| -rw-r--r-- | roles/etcd/tasks/clean_data.yml | 2 | ||||
| -rw-r--r-- | roles/etcd/tasks/migrate.add_ttls.yml | 2 | ||||
| -rw-r--r-- | roles/etcd/tasks/migrate.configure_master.yml | 2 | ||||
| -rw-r--r-- | roles/etcd/tasks/migrate.pre_check.yml | 2 | ||||
| -rw-r--r-- | roles/etcd/tasks/migrate.yml | 2 | ||||
| -rw-r--r-- | roles/etcd/tasks/migration/add_ttls.yml | 34 | ||||
| -rw-r--r-- | roles/etcd/tasks/migration/check.yml | 56 | ||||
| -rw-r--r-- | roles/etcd/tasks/migration/check_cluster_health.yml | 23 | ||||
| -rw-r--r-- | roles/etcd/tasks/migration/check_cluster_status.yml | 32 | ||||
| -rw-r--r-- | roles/etcd/tasks/migration/configure_master.yml | 13 | ||||
| -rw-r--r-- | roles/etcd/tasks/migration/migrate.yml | 56 | 
12 files changed, 229 insertions, 0 deletions
| diff --git a/roles/etcd/tasks/auxiliary/clean_data.yml b/roles/etcd/tasks/auxiliary/clean_data.yml new file mode 100644 index 000000000..95a0e7c0a --- /dev/null +++ b/roles/etcd/tasks/auxiliary/clean_data.yml @@ -0,0 +1,5 @@ +--- +- name: Remove member data +  file: +    path: /var/lib/etcd/member +    state: absent diff --git a/roles/etcd/tasks/clean_data.yml b/roles/etcd/tasks/clean_data.yml new file mode 100644 index 000000000..d131ffd21 --- /dev/null +++ b/roles/etcd/tasks/clean_data.yml @@ -0,0 +1,2 @@ +--- +- include: auxiliary/clean_data.yml diff --git a/roles/etcd/tasks/migrate.add_ttls.yml b/roles/etcd/tasks/migrate.add_ttls.yml new file mode 100644 index 000000000..bc27e4ea1 --- /dev/null +++ b/roles/etcd/tasks/migrate.add_ttls.yml @@ -0,0 +1,2 @@ +--- +- include: migration/add_ttls.yml diff --git a/roles/etcd/tasks/migrate.configure_master.yml b/roles/etcd/tasks/migrate.configure_master.yml new file mode 100644 index 000000000..3ada6e362 --- /dev/null +++ b/roles/etcd/tasks/migrate.configure_master.yml @@ -0,0 +1,2 @@ +--- +- include: migration/configure_master.yml diff --git a/roles/etcd/tasks/migrate.pre_check.yml b/roles/etcd/tasks/migrate.pre_check.yml new file mode 100644 index 000000000..124d21561 --- /dev/null +++ b/roles/etcd/tasks/migrate.pre_check.yml @@ -0,0 +1,2 @@ +--- +- include: migration/check.yml diff --git a/roles/etcd/tasks/migrate.yml b/roles/etcd/tasks/migrate.yml new file mode 100644 index 000000000..5d5385873 --- /dev/null +++ b/roles/etcd/tasks/migrate.yml @@ -0,0 +1,2 @@ +--- +- include: migration/migrate.yml diff --git a/roles/etcd/tasks/migration/add_ttls.yml b/roles/etcd/tasks/migration/add_ttls.yml new file mode 100644 index 000000000..14625e49e --- /dev/null +++ b/roles/etcd/tasks/migration/add_ttls.yml @@ -0,0 +1,34 @@ +--- +# To be executed on first master +- slurp: +    src: "{{ openshift.common.config_base }}/master/master-config.yaml" +  register: g_master_config_output + +- set_fact: +    accessTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.accessTokenMaxAgeSeconds | default(86400) }}" +    authroizeTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.authroizeTokenMaxAgeSeconds | default(500) }}" +    controllerLeaseTTL: "{{ (g_master_config_output.content|b64decode|from_yaml).controllerLeaseTTL | default(30) }}" + +- name: Re-introduce leases (as a replacement for key TTLs) +  command: > +    oadm migrate etcd-ttl \ +    --cert {{ r_etcd_common_master_peer_cert_file }} \ +    --key {{ r_etcd_common_master_peer_key_file }} \ +    --cacert {{ r_etcd_common_master_peer_ca_file }} \ +    --etcd-address 'https://{{ etcd_peer }}:{{ etcd_client_port }}' \ +    --ttl-keys-prefix {{ item.keys }} \ +    --lease-duration {{ item.ttl }} +  environment: +    ETCDCTL_API: 3 +    PATH: "/usr/local/bin:/var/usrlocal/bin:{{ ansible_env.PATH }}" +  with_items: +    - keys: "/kubernetes.io/events" +      ttl: "1h" +    - keys: "/kubernetes.io/masterleases" +      ttl: "10s" +    - keys: "/openshift.io/oauth/accesstokens" +      ttl: "{{ accessTokenMaxAgeSeconds }}s" +    - keys: "/openshift.io/oauth/authorizetokens" +      ttl: "{{ authroizeTokenMaxAgeSeconds }}s" +    - keys: "/openshift.io/leases/controllers" +      ttl: "{{ controllerLeaseTTL }}s" diff --git a/roles/etcd/tasks/migration/check.yml b/roles/etcd/tasks/migration/check.yml new file mode 100644 index 000000000..0804d9e1c --- /dev/null +++ b/roles/etcd/tasks/migration/check.yml @@ -0,0 +1,56 @@ +--- + +# Check the cluster is healthy +- include: check_cluster_health.yml + +# Check if the member has v3 data already +# Run the migration only if the data are v2 +- name: Check if there are any v3 data +  command: > +    etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' get "" --from-key --keys-only -w json --limit 1 +  environment: +    ETCDCTL_API: 3 +  register: l_etcdctl_output + +- fail: +    msg: "Unable to get a number of v3 keys" +  when: l_etcdctl_output.rc != 0 + +- fail: +    msg: "The etcd has at least one v3 key" +  when: "'count' in (l_etcdctl_output.stdout | from_json) and (l_etcdctl_output.stdout | from_json).count != 0" + + +# TODO(jchaloup): once the until loop can be used over include/block, +#                 remove the repetive code +# - until loop not supported over include statement (nor block) +#   https://github.com/ansible/ansible/issues/17098 +# - with_items not supported over block + +# Check the cluster status for the first time +- include: check_cluster_status.yml + +# Check the cluster status for the second time +- block: +  - debug: +      msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" +  - name: Wait a while before another check +    pause: +      seconds: 5 +    when: not l_etcd_cluster_status_ok | bool + +  - include: check_cluster_status.yml +    when: not l_etcd_cluster_status_ok | bool + + +# Check the cluster status for the third time +- block: +  - debug: +      msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" +  - name: Wait a while before another check +    pause: +      seconds: 5 +    when: not l_etcd_cluster_status_ok | bool + +  - include: check_cluster_status.yml +    when: not l_etcd_cluster_status_ok | bool diff --git a/roles/etcd/tasks/migration/check_cluster_health.yml b/roles/etcd/tasks/migration/check_cluster_health.yml new file mode 100644 index 000000000..201d83f99 --- /dev/null +++ b/roles/etcd/tasks/migration/check_cluster_health.yml @@ -0,0 +1,23 @@ +--- +- name: Check cluster health +  command: > +    etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health +  register: etcd_cluster_health +  changed_when: false +  failed_when: false + +- name: Assume a member is not healthy +  set_fact: +    etcd_member_healthy: false + +- name: Get member item health status +  set_fact: +    etcd_member_healthy: true +  with_items: "{{ etcd_cluster_health.stdout_lines }}" +  when: "(etcd_peer in item) and ('is healthy' in item)" + +- name: Check the etcd cluster health +  # TODO(jchaloup): should we fail or ask user if he wants to continue? Or just wait until the cluster is healthy? +  fail: +    msg: "Etcd member {{ etcd_peer }} is not healthy" +  when: not etcd_member_healthy diff --git a/roles/etcd/tasks/migration/check_cluster_status.yml b/roles/etcd/tasks/migration/check_cluster_status.yml new file mode 100644 index 000000000..b69fb5a52 --- /dev/null +++ b/roles/etcd/tasks/migration/check_cluster_status.yml @@ -0,0 +1,32 @@ +--- +# etcd_ip originates from etcd_common role +- name: Check cluster status +  command: > +    etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' -w json endpoint status +  environment: +    ETCDCTL_API: 3 +  register: l_etcd_cluster_status + +- name: Retrieve raftIndex +  set_fact: +    etcd_member_raft_index: "{{ (l_etcd_cluster_status.stdout | from_json)[0]['Status']['raftIndex'] }}" + +- block: +  # http://docs.ansible.com/ansible/playbooks_filters.html#extracting-values-from-containers +  - name: Group all raftIndices into a list +    set_fact: +      etcd_members_raft_indices: "{{ groups['oo_etcd_to_migrate'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}" + +  - name: Check the minimum and the maximum of raftIndices is at most 1 +    set_fact: +      etcd_members_raft_indices_diff: "{{ ((etcd_members_raft_indices | max | int) - (etcd_members_raft_indices | min | int)) | int }}" + +  - debug: +      msg: "Raft indices difference: {{ etcd_members_raft_indices_diff }}" + +  when: inventory_hostname in groups.oo_etcd_to_migrate[0] + +# The cluster raft status is ok if the difference of the max and min raft index is at most 1 +- name: capture the status +  set_fact: +    l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_migrate[0]]['etcd_members_raft_indices_diff'] | int < 2 }}" diff --git a/roles/etcd/tasks/migration/configure_master.yml b/roles/etcd/tasks/migration/configure_master.yml new file mode 100644 index 000000000..a305d5bf3 --- /dev/null +++ b/roles/etcd/tasks/migration/configure_master.yml @@ -0,0 +1,13 @@ +--- +- name: Configure master to use etcd3 storage backend +  yedit: +    src: /etc/origin/master/master-config.yaml +    key: "{{ item.key }}" +    value: "{{ item.value }}" +  with_items: +    - key: kubernetesMasterConfig.apiServerArguments.storage-backend +      value: +        - etcd3 +    - key: kubernetesMasterConfig.apiServerArguments.storage-media-type +      value: +        - application/vnd.kubernetes.protobuf diff --git a/roles/etcd/tasks/migration/migrate.yml b/roles/etcd/tasks/migration/migrate.yml new file mode 100644 index 000000000..54a9c74ff --- /dev/null +++ b/roles/etcd/tasks/migration/migrate.yml @@ -0,0 +1,56 @@ +--- +# Should this be run in a serial manner? +- set_fact: +    l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}" + +- name: Migrate etcd data +  command: > +    etcdctl migrate --data-dir={{ etcd_data_dir }} +  environment: +    ETCDCTL_API: 3 +  register: l_etcdctl_migrate +# TODO(jchaloup): If any of the members fails, we need to restore all members to v2 from the pre-migrate backup +- name: Check the etcd v2 data are correctly migrated +  fail: +    msg: "Failed to migrate a member" +  when: "'finished transforming keys' not in l_etcdctl_migrate.stdout and 'no v2 keys to migrate' not in l_etcdctl_migrate.stdout" +- name: Migration message +  debug: +    msg: "Etcd migration finished with: {{ l_etcdctl_migrate.stdout }}" +- name: Set ETCD_FORCE_NEW_CLUSTER=true on first etcd host +  lineinfile: +    line: "ETCD_FORCE_NEW_CLUSTER=true" +    dest: /etc/etcd/etcd.conf +    backup: true +- name: Start etcd +  systemd: +    name: "{{ l_etcd_service }}" +    state: started +- name: Wait for cluster to become healthy after bringing up first member +  command: > +    etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health +  register: l_etcd_migrate_health +  until: l_etcd_migrate_health.rc == 0 +  retries: 3 +  delay: 30 +- name: Unset ETCD_FORCE_NEW_CLUSTER=true on first etcd host +  lineinfile: +    line: "ETCD_FORCE_NEW_CLUSTER=true" +    dest: /etc/etcd/etcd.conf +    state: absent +    backup: true +- name: Restart first etcd host +  systemd: +    name: "{{ l_etcd_service }}" +    state: restarted + +- name: Wait for cluster to become healthy after bringing up first member +  command: > +    etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health +  register: l_etcd_migrate_health +  until: l_etcd_migrate_health.rc == 0 +  retries: 3 +  delay: 30 + +- set_fact: +    r_etcd_migrate_success: true | 
