diff options
| author | Scott Dodson <sdodson@redhat.com> | 2017-08-10 09:25:36 -0400 | 
|---|---|---|
| committer | Scott Dodson <sdodson@redhat.com> | 2018-01-10 11:01:11 -0500 | 
| commit | 0841917f05cfad2701164edbb271167c277d3300 (patch) | |
| tree | 97ea4e657da1a3513d0ffe5b3f5518521af466d9 | |
| parent | 31d19c5e68e61d004a93db738772c120e7dd0eb5 (diff) | |
Add the ability to specify a timeout for node drain operations
5 files changed, 37 insertions, 11 deletions
diff --git a/inventory/hosts.example b/inventory/hosts.example index bc85d1020..b07e0d159 100644 --- a/inventory/hosts.example +++ b/inventory/hosts.example @@ -991,6 +991,14 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true',  # where as this would not  # openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50  # +# A timeout to wait for nodes to drain pods can be specified to ensure that the +# upgrade continues even if nodes fail to drain pods in the allowed time. The +# default value of 0 will wait indefinitely allowing the admin to investigate +# the root cause and ensuring that disruption budgets are respected. If the +# a timeout of 0 is used there will also be one attempt to re-try draining the +# node. If a non zero timeout is specified there will be no attempt to retry. +#openshift_upgrade_nodes_drain_timeout=0 +#  # Multiple data migrations take place and if they fail they will fail the upgrade  # You may wish to disable these or make them non fatal  # diff --git a/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml index ffb11670d..8392e21ee 100644 --- a/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml @@ -51,13 +51,19 @@    - name: Drain Node for Kubelet upgrade      command: > -      {{ openshift_client_binary }} adm drain {{ openshift.node.nodename }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets +      {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} +      --config={{ openshift.common.config_base }}/master/admin.kubeconfig +      --force --delete-local-data --ignore-daemonsets +      --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s      delegate_to: "{{ groups.oo_first_master.0 }}"      when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade      register: l_docker_upgrade_drain_result      until: not (l_docker_upgrade_drain_result is failed) -    retries: 60 -    delay: 60 +    retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}" +    delay: 5 +    failed_when: +    - l_docker_upgrade_drain_result is failed +    - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0    - include_tasks: tasks/upgrade.yml      when: l_docker_upgrade is defined and l_docker_upgrade | bool diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 91d496ff4..3f2ba8969 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -291,12 +291,18 @@    - name: Drain Node for Kubelet upgrade      command: > -      {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets +      {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} +      --config={{ openshift.common.config_base }}/master/admin.kubeconfig +      --force --delete-local-data --ignore-daemonsets +      --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s      delegate_to: "{{ groups.oo_first_master.0 }}"      register: l_upgrade_control_plane_drain_result      until: not (l_upgrade_control_plane_drain_result is failed) -    retries: 60 -    delay: 60 +    retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}" +    delay: 5 +    failed_when: +    - l_upgrade_control_plane_drain_result is failed +    - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0    roles:    - openshift_facts diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index aba179c2b..856c8328c 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -35,12 +35,18 @@    - name: Drain Node for Kubelet upgrade      command: > -      {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets +      {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} +      --config={{ openshift.common.config_base }}/master/admin.kubeconfig +      --force --delete-local-data --ignore-daemonsets +      --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s      delegate_to: "{{ groups.oo_first_master.0 }}"      register: l_upgrade_nodes_drain_result      until: not (l_upgrade_nodes_drain_result is failed) -    retries: 60 -    delay: 60 +    retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}" +    delay: 5 +    failed_when: +    - l_upgrade_nodes_drain_result is failed +    - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0    post_tasks:    - import_role: diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml index 6d59bfd0b..e259b5d09 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml @@ -50,11 +50,11 @@      delegate_to: "{{ groups.oo_first_master.0 }}"      register: l_upgrade_nodes_drain_result      until: not (l_upgrade_nodes_drain_result is failed) -    retries: "{{ 1 if openshift_upgrade_nodes_drain_timeout | default(0) == '0' else 0  | int }}" +    retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}"      delay: 5      failed_when:      - l_upgrade_nodes_drain_result is failed -    - openshift_upgrade_nodes_drain_timeout | default(0) == '0' +    - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0  # Alright, let's clean up!  - name: clean up the old scale group  | 
