diff options
Diffstat (limited to 'playbooks')
28 files changed, 324 insertions, 263 deletions
diff --git a/playbooks/adhoc/s3_registry/s3_registry.j2 b/playbooks/adhoc/s3_registry/s3_registry.j2 deleted file mode 100644 index 10454ad11..000000000 --- a/playbooks/adhoc/s3_registry/s3_registry.j2 +++ /dev/null @@ -1,23 +0,0 @@ -version: 0.1 -log: - level: debug -http: - addr: :5000 -storage: - cache: - layerinfo: inmemory - s3: - accesskey: {{ aws_access_key }} - secretkey: {{ aws_secret_key }} - region: {{ aws_bucket_region }} - bucket: {{ aws_bucket_name }} - encrypt: true - secure: true - v4auth: true - rootdirectory: /registry -auth: - openshift: - realm: openshift -middleware: - repository: - - name: openshift diff --git a/playbooks/adhoc/s3_registry/s3_registry.yml b/playbooks/adhoc/s3_registry/s3_registry.yml deleted file mode 100644 index 2c79a1b4d..000000000 --- a/playbooks/adhoc/s3_registry/s3_registry.yml +++ /dev/null @@ -1,78 +0,0 @@ ---- -# This playbook creates an S3 bucket named after your cluster and configures the docker-registry service to use the bucket as its backend storage. -# Usage: -# ansible-playbook s3_registry.yml -e clusterid="mycluster" -e aws_bucket="clusterid-docker" -e aws_region="us-east-1" -# -# The AWS access/secret keys should be the keys of a separate user (not your main user), containing only the necessary S3 access role. -# The 'clusterid' is the short name of your cluster. - -- hosts: tag_clusterid_{{ clusterid }}:&tag_host-type_openshift-master - remote_user: root - gather_facts: False - - vars: - aws_access_key: "{{ lookup('env', 'S3_ACCESS_KEY_ID') }}" - aws_secret_key: "{{ lookup('env', 'S3_SECRET_ACCESS_KEY') }}" - aws_bucket_name: "{{ aws_bucket | default(clusterid ~ '-docker') }}" - aws_bucket_region: "{{ aws_region | default(lookup('env', 'S3_REGION') | default('us-east-1', true)) }}" - aws_create_bucket: "{{ aws_create | default(True) }}" - aws_tmp_path: "{{ aws_tmp_pathfile | default('/root/config.yml')}}" - aws_delete_tmp_file: "{{ aws_delete_tmp | default(True) }}" - - tasks: - - - name: Check for AWS creds - fail: - msg: "Couldn't find {{ item }} creds in ENV" - when: "{{ item }} == ''" - with_items: - - aws_access_key - - aws_secret_key - - - name: Scale down registry - command: oc scale --replicas=0 dc/docker-registry - - - name: Create S3 bucket - when: aws_create_bucket | bool - local_action: - module: s3 bucket="{{ aws_bucket_name }}" mode=create - - - name: Set up registry environment variable - command: oc env dc/docker-registry REGISTRY_CONFIGURATION_PATH=/etc/registryconfig/config.yml - - - name: Generate docker registry config - template: src="s3_registry.j2" dest="/root/config.yml" owner=root mode=0600 - - - name: Determine if new secrets are needed - command: oc get secrets - register: secrets - - - name: Create registry secrets - command: oc secrets new dockerregistry /root/config.yml - when: "'dockerregistry' not in secrets.stdout" - - - name: Determine if service account contains secrets - command: oc describe serviceaccount/registry - register: serviceaccount - - - name: Add secrets to registry service account - command: oc secrets add serviceaccount/registry secrets/dockerregistry - when: "'dockerregistry' not in serviceaccount.stdout" - - - name: Determine if deployment config contains secrets - command: oc volume dc/docker-registry --list - register: dc - - - name: Add secrets to registry deployment config - command: oc volume dc/docker-registry --add --name=dockersecrets -m /etc/registryconfig --type=secret --secret-name=dockerregistry - when: "'dockersecrets' not in dc.stdout" - - - name: Wait for deployment config to take effect before scaling up - pause: seconds=30 - - - name: Scale up registry - command: oc scale --replicas=1 dc/docker-registry - - - name: Delete temporary config file - file: path={{ aws_tmp_path }} state=absent - when: aws_delete_tmp_file | bool diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml index 147e84131..ffdcd0ce1 100644 --- a/playbooks/adhoc/uninstall.yml +++ b/playbooks/adhoc/uninstall.yml @@ -148,6 +148,22 @@ - vovsbr when: "{{ openshift_remove_all | default(true) | bool }}" + - shell: atomic uninstall "{{ item }}"-master-api + changed_when: False + failed_when: False + with_items: + - openshift-enterprise + - atomic-enterprise + - origin + + - shell: atomic uninstall "{{ item }}"-master-controllers + changed_when: False + failed_when: False + with_items: + - openshift-enterprise + - atomic-enterprise + - origin + - shell: atomic uninstall "{{ item }}"-master changed_when: False failed_when: False diff --git a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml index 13e1da961..4ee6afe2a 100644 --- a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml +++ b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml @@ -22,16 +22,28 @@ hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config serial: 1 any_errors_fatal: true + + roles: + - lib_openshift + tasks: - - name: Prepare for Node draining - command: > - {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --schedulable=false + - name: Mark node unschedulable + oadm_manage_node: + node: "{{ openshift.node.nodename | lower }}" + schedulable: False delegate_to: "{{ groups.oo_first_master.0 }}" - when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade + retries: 10 + delay: 5 + register: node_unschedulable + until: node_unschedulable|succeeded + when: + - l_docker_upgrade is defined + - l_docker_upgrade | bool + - inventory_hostname in groups.oo_nodes_to_upgrade - name: Drain Node for Kubelet upgrade command: > - {{ openshift.common.admin_binary }} drain {{ openshift.node.nodename }} --force --delete-local-data + {{ openshift.common.admin_binary }} drain {{ openshift.node.nodename }} --force --delete-local-data --ignore-daemonsets delegate_to: "{{ groups.oo_first_master.0 }}" when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade @@ -39,7 +51,12 @@ when: l_docker_upgrade is defined and l_docker_upgrade | bool - name: Set node schedulability - command: > - {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --schedulable=true + oadm_manage_node: + node: "{{ openshift.node.nodename | lower }}" + schedulable: True delegate_to: "{{ groups.oo_first_master.0 }}" - when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade and openshift.node.schedulable | bool + retries: 10 + delay: 5 + register: node_schedulable + until: node_schedulable|succeeded + when: node_unschedulable|changed diff --git a/playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade_control_plane.yml b/playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade_control_plane.yml index d791e89f6..b61d9e58a 100644 --- a/playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade_control_plane.yml +++ b/playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade_control_plane.yml @@ -25,6 +25,9 @@ openshift_upgrade_min: "{{ '1.2' if deployment_type == 'origin' else '3.2' }}" # Pre-upgrade +- include: ../../../../common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml + tags: + - pre_upgrade - name: Update repos on control plane hosts hosts: oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config diff --git a/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade_control_plane.yml b/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade_control_plane.yml index d1c2bd17a..7ae1b3e6e 100644 --- a/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade_control_plane.yml +++ b/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade_control_plane.yml @@ -25,6 +25,9 @@ openshift_upgrade_min: "{{ '1.3' if deployment_type == 'origin' else '3.3' }}" # Pre-upgrade +- include: ../../../../common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml + tags: + - pre_upgrade - name: Update repos on control plane hosts hosts: oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config diff --git a/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade_nodes.yml b/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade_nodes.yml index f6e66c477..ec63ea60e 100644 --- a/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade_nodes.yml +++ b/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade_nodes.yml @@ -47,7 +47,7 @@ tags: - pre_upgrade -- include: ../disable_excluder.yml +- include: ../../../../common/openshift-cluster/disable_excluder.yml tags: - pre_upgrade diff --git a/playbooks/byo/openshift-cluster/upgrades/v3_5/upgrade.yml b/playbooks/byo/openshift-cluster/upgrades/v3_5/upgrade.yml index e55ab1b16..69cabcd33 100644 --- a/playbooks/byo/openshift-cluster/upgrades/v3_5/upgrade.yml +++ b/playbooks/byo/openshift-cluster/upgrades/v3_5/upgrade.yml @@ -101,3 +101,5 @@ - include: ../../../../common/openshift-cluster/upgrades/upgrade_nodes.yml - include: ../../../../common/openshift-cluster/upgrades/post_control_plane.yml + +- include: ../../../../common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml diff --git a/playbooks/byo/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml b/playbooks/byo/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml index e18b4280c..719057d2b 100644 --- a/playbooks/byo/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml +++ b/playbooks/byo/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml @@ -25,6 +25,9 @@ openshift_upgrade_min: "{{ '1.4' if deployment_type == 'origin' else '3.4' }}" # Pre-upgrade +- include: ../../../../common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml + tags: + - pre_upgrade - name: Update repos on control plane hosts hosts: oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config @@ -104,3 +107,5 @@ - include: ../../../../common/openshift-cluster/upgrades/upgrade_control_plane.yml - include: ../../../../common/openshift-cluster/upgrades/post_control_plane.yml + +- include: ../../../../common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml diff --git a/playbooks/byo/openshift-preflight/check.yml b/playbooks/byo/openshift-preflight/check.yml index 32673d01d..c5f05d0f0 100644 --- a/playbooks/byo/openshift-preflight/check.yml +++ b/playbooks/byo/openshift-preflight/check.yml @@ -1,31 +1,12 @@ --- - hosts: OSEv3 - roles: - - openshift_preflight/init - -- hosts: OSEv3 - name: checks that apply to all hosts - gather_facts: no - ignore_errors: yes - roles: - - openshift_preflight/common - -- hosts: masters - name: checks that apply to masters - gather_facts: no - ignore_errors: yes - roles: - - openshift_preflight/masters - -- hosts: nodes - name: checks that apply to nodes - gather_facts: no - ignore_errors: yes - roles: - - openshift_preflight/nodes - -- hosts: OSEv3 - name: verify check results - gather_facts: no - roles: - - openshift_preflight/verify_status + name: run OpenShift health checks + roles: + - openshift_health_checker + post_tasks: + # NOTE: we need to use the old "action: name" syntax until + # https://github.com/ansible/ansible/issues/20513 is fixed. + - action: openshift_health_check + args: + checks: + - '@preflight' diff --git a/playbooks/certificate_expiry/default.yaml b/playbooks/certificate_expiry/default.yaml new file mode 100644 index 000000000..630135cae --- /dev/null +++ b/playbooks/certificate_expiry/default.yaml @@ -0,0 +1,10 @@ +--- +# Default behavior, you will need to ensure you run ansible with the +# -v option to see report results: + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/certificate_expiry/easy-mode.yaml b/playbooks/certificate_expiry/easy-mode.yaml new file mode 100644 index 000000000..ae41c7c14 --- /dev/null +++ b/playbooks/certificate_expiry/easy-mode.yaml @@ -0,0 +1,18 @@ +--- +# This example playbook is great if you're just wanting to try the +# role out. +# +# This example enables HTML and JSON reports +# +# All certificates (healthy or not) are included in the results + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_save_json_results: yes + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_show_all: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/certificate_expiry/html_and_json_default_paths.yaml b/playbooks/certificate_expiry/html_and_json_default_paths.yaml new file mode 100644 index 000000000..d80cb6ff4 --- /dev/null +++ b/playbooks/certificate_expiry/html_and_json_default_paths.yaml @@ -0,0 +1,12 @@ +--- +# Generate HTML and JSON artifacts in their default paths: + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_save_json_results: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/certificate_expiry/longer-warning-period-json-results.yaml b/playbooks/certificate_expiry/longer-warning-period-json-results.yaml new file mode 100644 index 000000000..87a0f3be4 --- /dev/null +++ b/playbooks/certificate_expiry/longer-warning-period-json-results.yaml @@ -0,0 +1,13 @@ +--- +# Change the expiration warning window to 1500 days (good for testing +# the module out) and save the results as a JSON file: + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_warning_days: 1500 + openshift_certificate_expiry_save_json_results: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/certificate_expiry/longer_warning_period.yaml b/playbooks/certificate_expiry/longer_warning_period.yaml new file mode 100644 index 000000000..960457c4b --- /dev/null +++ b/playbooks/certificate_expiry/longer_warning_period.yaml @@ -0,0 +1,12 @@ +--- +# Change the expiration warning window to 1500 days (good for testing +# the module out): + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_warning_days: 1500 + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/certificate_expiry/roles b/playbooks/certificate_expiry/roles new file mode 120000 index 000000000..b741aa3db --- /dev/null +++ b/playbooks/certificate_expiry/roles @@ -0,0 +1 @@ +../../roles
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/registry.yml b/playbooks/common/openshift-cluster/redeploy-certificates/registry.yml index 18b93e1d6..6771cc98d 100644 --- a/playbooks/common/openshift-cluster/redeploy-certificates/registry.yml +++ b/playbooks/common/openshift-cluster/redeploy-certificates/registry.yml @@ -2,6 +2,8 @@ - name: Update registry certificates hosts: oo_first_master vars: + roles: + - lib_openshift tasks: - name: Create temp directory for kubeconfig command: mktemp -d /tmp/openshift-ansible-XXXXXX @@ -46,12 +48,15 @@ # Replace dc/docker-registry certificate secret contents if set. - block: + - name: Load lib_openshift modules + include_role: + name: lib_openshift + - name: Retrieve registry service IP - command: > - {{ openshift.common.client_binary }} get service docker-registry - -o jsonpath='{.spec.clusterIP}' - --config={{ mktemp.stdout }}/admin.kubeconfig - -n default + oc_service: + namespace: default + name: docker-registry + state: list register: docker_registry_service_ip changed_when: false @@ -65,18 +70,22 @@ --signer-cert={{ openshift.common.config_base }}/master/ca.crt --signer-key={{ openshift.common.config_base }}/master/ca.key --signer-serial={{ openshift.common.config_base }}/master/ca.serial.txt - --hostnames="{{ docker_registry_service_ip.stdout }},docker-registry.default.svc.cluster.local,{{ docker_registry_route_hostname }}" + --hostnames="{{ docker_registry_service_ip.results.clusterip }},docker-registry.default.svc.cluster.local,{{ docker_registry_route_hostname }}" --cert={{ openshift.common.config_base }}/master/registry.crt --key={{ openshift.common.config_base }}/master/registry.key - name: Update registry certificates secret - shell: > - {{ openshift.common.client_binary }} secret new registry-certificates - {{ openshift.common.config_base }}/master/registry.crt - {{ openshift.common.config_base }}/master/registry.key - --config={{ mktemp.stdout }}/admin.kubeconfig - -n default - -o json | oc replace -f - + oc_secret: + kubeconfig: "{{ mktemp.stdout }}/admin.kubeconfig" + name: registry-certificates + namespace: default + state: present + files: + - name: registry.crt + path: "{{ openshift.common.config_base }}/master/registry.crt" + - name: registry.key + path: "{{ openshift.common.config_base }}/master/registry.key" + run_once: true when: l_docker_registry_dc.rc == 0 and 'registry-certificates' in docker_registry_secrets and 'REGISTRY_HTTP_TLS_CERTIFICATE' in docker_registry_env_vars and 'REGISTRY_HTTP_TLS_KEY' in docker_registry_env_vars - name: Redeploy docker registry diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/router.yml b/playbooks/common/openshift-cluster/redeploy-certificates/router.yml index a9e9f0915..35eedd5ee 100644 --- a/playbooks/common/openshift-cluster/redeploy-certificates/router.yml +++ b/playbooks/common/openshift-cluster/redeploy-certificates/router.yml @@ -2,12 +2,13 @@ - name: Update router certificates hosts: oo_first_master vars: + roles: + - lib_openshift tasks: - name: Create temp directory for kubeconfig command: mktemp -d /tmp/openshift-ansible-XXXXXX register: mktemp changed_when: false - - name: Copy admin client config(s) command: > cp {{ openshift.common.config_base }}/master//admin.kubeconfig {{ mktemp.stdout }}/admin.kubeconfig @@ -45,10 +46,12 @@ - block: - name: Delete existing router certificate secret - command: > - {{ openshift.common.client_binary }} delete secret/router-certs - --config={{ mktemp.stdout }}/admin.kubeconfig - -n default + oc_secret: + kubeconfig: "{{ mktemp.stdout }}/admin.kubeconfig" + name: router-certs + namespace: default + state: absent + run_once: true - name: Remove router service annotations command: > diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml index 45aabf3e4..7ef79afa9 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -29,12 +29,18 @@ - name: Check available disk space for etcd backup shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 register: avail_disk + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false # TODO: replace shell module with command and update later checks - name: Check current embedded etcd disk usage shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 register: etcd_disk_usage when: embedded_etcd | bool + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false - name: Abort if insufficient disk space for etcd backup fail: diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml index 690858c53..a9b5b94e6 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml @@ -9,21 +9,36 @@ register: etcd_rpm_version failed_when: false when: not openshift.common.is_containerized | bool + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false + - name: Record containerized etcd version command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\* register: etcd_container_version failed_when: false when: openshift.common.is_containerized | bool + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false + - name: Record containerized etcd version command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\* register: etcd_container_version failed_when: false when: openshift.common.is_containerized | bool and not openshift.common.is_etcd_system_container | bool + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false + - name: Record containerized etcd version command: runc exec etcd_container rpm -qa --qf '%{version}' etcd\* register: etcd_container_version failed_when: false when: openshift.common.is_containerized | bool and openshift.common.is_etcd_system_container | bool + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false # I really dislike this copy/pasta but I wasn't able to find a way to get it to loop # through hosts, then loop through tasks only when appropriate diff --git a/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml index 37c89374c..046535680 100644 --- a/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml @@ -1,20 +1,17 @@ --- - name: Filter list of nodes to be upgraded if necessary hosts: oo_first_master + + roles: + - lib_openshift + tasks: - name: Retrieve list of openshift nodes matching upgrade label - command: > - {{ openshift.common.client_binary }} - get nodes - --config={{ openshift.common.config_base }}/master/admin.kubeconfig - --selector={{ openshift_upgrade_nodes_label }} - -o jsonpath='{.items[*].metadata.name}' - register: matching_nodes - changed_when: false - when: openshift_upgrade_nodes_label is defined - - - set_fact: - nodes_to_upgrade: "{{ matching_nodes.stdout.split(' ') }}" + oc_obj: + state: list + kind: node + selector: "{{ openshift_upgrade_nodes_label }}" + register: nodes_to_upgrade when: openshift_upgrade_nodes_label is defined # We got a list of nodes with the label, now we need to match these with inventory hosts @@ -26,7 +23,9 @@ ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" ansible_become: "{{ g_sudo | default(omit) }}" with_items: " {{ groups['oo_nodes_to_config'] }}" - when: openshift_upgrade_nodes_label is defined and hostvars[item].openshift.common.hostname in nodes_to_upgrade + when: + - openshift_upgrade_nodes_label is defined + - hostvars[item].openshift.common.hostname in nodes_to_upgrade.results.results[0]['items'] | map(attribute='metadata.name') | list changed_when: false # Build up the oo_nodes_to_upgrade group, use the list filtered by label if diff --git a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml index 4135f7e94..01c1e0c15 100644 --- a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml @@ -9,77 +9,92 @@ registry_image: "{{ openshift.master.registry_url | replace( '${component}', 'docker-registry' ) | replace ( '${version}', openshift_image_tag ) }}" router_image: "{{ openshift.master.registry_url | replace( '${component}', 'haproxy-router' ) | replace ( '${version}', openshift_image_tag ) }}" oc_cmd: "{{ openshift.common.client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig" - roles: - - openshift_manageiq - # Create the new templates shipped in 3.2, existing templates are left - # unmodified. This prevents the subsequent role definition for - # openshift_examples from failing when trying to replace templates that do - # not already exist. We could have potentially done a replace --force to - # create and update in one step. - - openshift_examples - - openshift_hosted_templates - # Update the existing templates - - role: openshift_examples - registry_url: "{{ openshift.master.registry_url }}" - openshift_examples_import_command: replace - - role: openshift_hosted_templates - registry_url: "{{ openshift.master.registry_url }}" - openshift_hosted_templates_import_command: replace - pre_tasks: - # TODO: remove temp_skip_router_registry_upgrade variable. This is a short term hack - # to allow ops to use this control plane upgrade, without triggering router/registry - # upgrade which has not yet been synced with their process. + pre_tasks: + - name: Load lib_openshift modules + include_role: + name: lib_openshift - name: Collect all routers - command: > - {{ oc_cmd }} get pods --all-namespaces -l 'router' -o json + oc_obj: + state: list + kind: pods + all_namespaces: True + selector: 'router' register: all_routers - failed_when: false - changed_when: false - when: temp_skip_router_registry_upgrade is not defined - - set_fact: haproxy_routers="{{ (all_routers.stdout | from_json)['items'] | oo_pods_match_component(openshift_deployment_type, 'haproxy-router') | oo_select_keys_from_list(['metadata']) }}" - when: all_routers.rc == 0 and temp_skip_router_registry_upgrade is not defined + - set_fact: haproxy_routers="{{ (all_routers.reults.results[0]['items'] | oo_pods_match_component(openshift_deployment_type, 'haproxy-router') | oo_select_keys_from_list(['metadata']) }}" + when: + - all_routers.results.returncode == 0 - set_fact: haproxy_routers=[] - when: all_routers.rc != 0 and temp_skip_router_registry_upgrade is not defined + when: + - all_routers.results.returncode != 0 - name: Update router image to current version - when: all_routers.rc == 0 and temp_skip_router_registry_upgrade is not defined + when: + - all_routers.results.returncode == 0 command: > {{ oc_cmd }} patch dc/{{ item['labels']['deploymentconfig'] }} -n {{ item['namespace'] }} -p '{"spec":{"template":{"spec":{"containers":[{"name":"router","image":"{{ router_image }}","livenessProbe":{"tcpSocket":null,"httpGet":{"path": "/healthz", "port": 1936, "host": "localhost", "scheme": "HTTP"},"initialDelaySeconds":10,"timeoutSeconds":1}}]}}}}' --api-version=v1 with_items: "{{ haproxy_routers }}" + # AUDIT:changed_when_note: `false` not being set here. What we + # need to do is check the current router image version and see if + # this task needs to be ran. - name: Check for default registry - command: > - {{ oc_cmd }} get -n default dc/docker-registry + oc_obj: + state: list + kind: dc + name: docker-registry register: _default_registry - failed_when: false - changed_when: false - when: temp_skip_router_registry_upgrade is not defined - name: Update registry image to current version - when: _default_registry.rc == 0 and temp_skip_router_registry_upgrade is not defined + when: + - _default_registry.results.results[0] != {} command: > {{ oc_cmd }} patch dc/docker-registry -n default -p '{"spec":{"template":{"spec":{"containers":[{"name":"registry","image":"{{ registry_image }}"}]}}}}' --api-version=v1 + # AUDIT:changed_when_note: `false` not being set here. What we + # need to do is check the current registry image version and see + # if this task needs to be ran. + + roles: + - openshift_manageiq + # Create the new templates shipped in 3.2, existing templates are left + # unmodified. This prevents the subsequent role definition for + # openshift_examples from failing when trying to replace templates that do + # not already exist. We could have potentially done a replace --force to + # create and update in one step. + - openshift_examples + - openshift_hosted_templates + # Update the existing templates + - role: openshift_examples + registry_url: "{{ openshift.master.registry_url }}" + openshift_examples_import_command: replace + - role: openshift_hosted_templates + registry_url: "{{ openshift.master.registry_url }}" + openshift_hosted_templates_import_command: replace # Check for warnings to be printed at the end of the upgrade: - name: Check for warnings hosts: oo_masters_to_config tasks: # Check if any masters are using pluginOrderOverride and warn if so, only for 1.3/3.3 and beyond: - - command: > - grep pluginOrderOverride {{ openshift.common.config_base }}/master/master-config.yaml + - name: grep pluginOrderOverride + command: grep pluginOrderOverride {{ openshift.common.config_base }}/master/master-config.yaml register: grep_plugin_order_override when: openshift.common.version_gte_3_3_or_1_3 | bool + changed_when: false failed_when: false + - name: Warn if pluginOrderOverride is in use in master-config.yaml - debug: msg="WARNING pluginOrderOverride is being deprecated in master-config.yaml, please see https://docs.openshift.com/enterprise/latest/architecture/additional_concepts/admission_controllers.html for more information." - when: not grep_plugin_order_override | skipped and grep_plugin_order_override.rc == 0 + debug: + msg: "WARNING pluginOrderOverride is being deprecated in master-config.yaml, please see https://docs.openshift.com/enterprise/latest/architecture/additional_concepts/admission_controllers.html for more information." + when: + - not grep_plugin_order_override | skipped + - grep_plugin_order_override.rc == 0 - include: ../reset_excluder.yml tags: diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index db2c27919..fd01a6625 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -238,47 +238,42 @@ any_errors_fatal: true pre_tasks: + - name: Load lib_openshift modules + include_role: + name: lib_openshift + # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node # or docker actually needs an upgrade before proceeding. Perhaps best to save this until # we merge upgrade functionality into the base roles and a normal config.yml playbook run. - - name: Determine if node is currently scheduleable - command: > - {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} get node {{ openshift.node.nodename | lower }} -o json - register: node_output - delegate_to: "{{ groups.oo_first_master.0 }}" - changed_when: false - - - set_fact: - was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}" - - name: Mark node unschedulable - command: > - {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false + oadm_manage_node: + node: "{{ openshift.node.nodename | lower }}" + schedulable: False delegate_to: "{{ groups.oo_first_master.0 }}" - # NOTE: There is a transient "object has been modified" error here, allow a couple - # retries for a more reliable upgrade. - register: node_unsched - until: node_unsched.rc == 0 - retries: 3 - delay: 1 + retries: 10 + delay: 5 + register: node_unschedulable + until: node_unschedulable|succeeded - name: Drain Node for Kubelet upgrade command: > - {{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --force --delete-local-data + {{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --force --delete-local-data --ignore-daemonsets delegate_to: "{{ groups.oo_first_master.0 }}" roles: + - lib_openshift - openshift_facts - docker - openshift_node_upgrade post_tasks: - name: Set node schedulability - command: > - {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true + oadm_manage_node: + node: "{{ openshift.node.nodename | lower }}" + schedulable: True delegate_to: "{{ groups.oo_first_master.0 }}" - when: was_schedulable | bool - register: node_sched - until: node_sched.rc == 0 - retries: 3 - delay: 1 + retries: 10 + delay: 5 + register: node_schedulable + until: node_schedulable|succeeded + when: node_unschedulable|changed diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index e45b635f7..4e1838c71 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -7,50 +7,45 @@ any_errors_fatal: true pre_tasks: + - name: Load lib_openshift modules + include_role: + name: lib_openshift + # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node # or docker actually needs an upgrade before proceeding. Perhaps best to save this until # we merge upgrade functionality into the base roles and a normal config.yml playbook run. - - name: Determine if node is currently scheduleable - command: > - {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} get node {{ openshift.node.nodename | lower }} -o json - register: node_output - delegate_to: "{{ groups.oo_first_master.0 }}" - changed_when: false - - - set_fact: - was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}" - - name: Mark node unschedulable - command: > - {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false + oadm_manage_node: + node: "{{ openshift.node.nodename | lower }}" + schedulable: False delegate_to: "{{ groups.oo_first_master.0 }}" - # NOTE: There is a transient "object has been modified" error here, allow a couple - # retries for a more reliable upgrade. - register: node_unsched - until: node_unsched.rc == 0 - retries: 3 - delay: 1 + retries: 10 + delay: 5 + register: node_unschedulable + until: node_unschedulable|succeeded - name: Drain Node for Kubelet upgrade command: > - {{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --force --delete-local-data + {{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --force --delete-local-data --ignore-daemonsets delegate_to: "{{ groups.oo_first_master.0 }}" roles: + - lib_openshift - openshift_facts - docker - openshift_node_upgrade post_tasks: - name: Set node schedulability - command: > - {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true + oadm_manage_node: + node: "{{ openshift.node.nodename | lower }}" + schedulable: True delegate_to: "{{ groups.oo_first_master.0 }}" - when: was_schedulable | bool - register: node_sched - until: node_sched.rc == 0 - retries: 3 - delay: 1 + retries: 10 + delay: 5 + register: node_schedulable + until: node_schedulable|succeeded + when: node_unschedulable|changed - include: ../reset_excluder.yml tags: diff --git a/playbooks/common/openshift-cluster/upgrades/v3_5/filter_plugins b/playbooks/common/openshift-cluster/upgrades/v3_5/filter_plugins new file mode 120000 index 000000000..7de3c1dd7 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/v3_5/filter_plugins @@ -0,0 +1 @@ +../../../../../filter_plugins/
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/v3_5/roles b/playbooks/common/openshift-cluster/upgrades/v3_5/roles new file mode 120000 index 000000000..415645be6 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/v3_5/roles @@ -0,0 +1 @@ +../../../../../roles/
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml new file mode 100644 index 000000000..48c69eccd --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml @@ -0,0 +1,18 @@ +--- +############################################################################### +# Post upgrade - Upgrade job storage +############################################################################### +- name: Upgrade job storage + hosts: oo_first_master + roles: + - { role: openshift_cli } + vars: + # Another spot where we assume docker is running and do not want to accidentally trigger an unsafe + # restart. + skip_docker_role: True + tasks: + - name: Upgrade job storage + command: > + {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig + migrate storage --include=jobs --confirm + run_once: true diff --git a/playbooks/common/openshift-master/restart_hosts.yml b/playbooks/common/openshift-master/restart_hosts.yml index a9750e40f..67ba0aa2e 100644 --- a/playbooks/common/openshift-master/restart_hosts.yml +++ b/playbooks/common/openshift-master/restart_hosts.yml @@ -7,14 +7,26 @@ ignore_errors: true become: yes +# WARNING: This process is riddled with weird behavior. + +# Workaround for https://github.com/ansible/ansible/issues/21269 +- set_fact: + wait_for_host: "{{ ansible_host }}" + +# Ansible's blog documents this *without* the port, which appears to now +# just wait until the timeout value and then proceed without checking anything. +# port is now required. +# +# However neither ansible_ssh_port or ansible_port are reliably defined, likely +# only if overridden. Assume a default of 22. - name: Wait for master to restart local_action: module: wait_for - host="{{ ansible_host }}" + host="{{ wait_for_host }}" state=started delay=10 timeout=600 - port="{{ ansible_ssh_port }}" + port="{{ ansible_port | default(ansible_ssh_port | default(22,boolean=True),boolean=True) }}" become: no # Now that ssh is back up we can wait for API on the remote system, |