From 1a868e61fbab8f1e2095c0952031656c47926220 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Fri, 19 May 2017 16:01:03 -0400 Subject: Tolerate failures in the node upgrade playbook --- inventory/byo/hosts.origin.example | 25 +++++++++++++++++++++++++ inventory/byo/hosts.ose.example | 25 +++++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'inventory') diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 20f342023..310b8ab44 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -788,6 +788,31 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # #etcd_ca_default_days=1825 +# Upgrade Control +# +# By default nodes are upgraded in a serial manner one at a time and all failures +# are fatal +#openshift_upgrade_nodes_serial=1 +#openshift_upgrade_nodes_max_fail_percentage=0 +# +# You can specify the number of nodes to upgrade at once. We do not currently +# attempt to verify that you have capacity to drain this many nodes at once +# so please be careful when specifying these values. You should also verify that +# the expected number of nodes are all schedulable and ready before starting an +# upgrade. If it's not possible to drain the requested nodes the upgrade will +# stall indefinitely until the drain is successful. +# +# If you're upgrading more than one node at a time you can specify the maximum +# percentage of failure within the batch before the upgrade is aborted. Any +# nodes that do fail are ignored for the rest of the playbook run and you should +# take care to investigate the failure and return the node to service so that +# your cluster. +# +# The percentage must exceed the value, this would fail on two failures +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 +# where as this would not +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 + # host group for masters [masters] ose3-master[1:3]-ansible.test.example.com diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index f75a47bb8..e126bbcab 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -785,6 +785,31 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # #etcd_ca_default_days=1825 +# Upgrade Control +# +# By default nodes are upgraded in a serial manner one at a time and all failures +# are fatal +#openshift_upgrade_nodes_serial=1 +#openshift_upgrade_nodes_max_fail_percentage=0 +# +# You can specify the number of nodes to upgrade at once. We do not currently +# attempt to verify that you have capacity to drain this many nodes at once +# so please be careful when specifying these values. You should also verify that +# the expected number of nodes are all schedulable and ready before starting an +# upgrade. If it's not possible to drain the requested nodes the upgrade will +# stall indefinitely until the drain is successful. +# +# If you're upgrading more than one node at a time you can specify the maximum +# percentage of failure within the batch before the upgrade is aborted. Any +# nodes that do fail are ignored for the rest of the playbook run and you should +# take care to investigate the failure and return the node to service so that +# your cluster. +# +# The percentage must exceed the value, this would fail on two failures +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 +# where as this would not +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 + # host group for masters [masters] ose3-master[1:3]-ansible.test.example.com -- cgit v1.2.3