summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml8
-rw-r--r--playbooks/common/openshift-cluster/redeploy-certificates.yml8
-rw-r--r--playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml6
-rw-r--r--roles/openshift_certificate_expiry/README.md20
-rw-r--r--roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py4
-rw-r--r--roles/openshift_certificate_expiry/library/openshift_cert_expiry.py45
-rw-r--r--roles/openshift_node/README.md2
7 files changed, 72 insertions, 21 deletions
diff --git a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml
index 0d451cf77..1e0a6d4e7 100644
--- a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml
+++ b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml
@@ -18,20 +18,20 @@
# If a node fails, halt everything, the admin will need to clean up and we
# don't want to carry on, potentially taking out every node. The playbook can safely be re-run
# and will not take any action on a node already running the requested docker version.
-- name: Evacuate and upgrade nodes
+- name: Drain and upgrade nodes
hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config
serial: 1
any_errors_fatal: true
tasks:
- - name: Prepare for Node evacuation
+ - name: Prepare for Node draining
command: >
{{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --schedulable=false
delegate_to: "{{ groups.oo_first_master.0 }}"
when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade
- - name: Evacuate Node for Kubelet upgrade
+ - name: Drain Node for Kubelet upgrade
command: >
- {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --evacuate --force
+ {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --drain --force
delegate_to: "{{ groups.oo_first_master.0 }}"
when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade
diff --git a/playbooks/common/openshift-cluster/redeploy-certificates.yml b/playbooks/common/openshift-cluster/redeploy-certificates.yml
index 5f008a045..5fc81bf3a 100644
--- a/playbooks/common/openshift-cluster/redeploy-certificates.yml
+++ b/playbooks/common/openshift-cluster/redeploy-certificates.yml
@@ -204,7 +204,7 @@
cp {{ openshift.common.config_base }}/master//admin.kubeconfig {{ mktemp.stdout }}/admin.kubeconfig
changed_when: False
-- name: Serially evacuate all nodes to trigger redeployments
+- name: Serially drain all nodes to trigger redeployments
hosts: oo_nodes_to_config
serial: 1
any_errors_fatal: true
@@ -222,7 +222,7 @@
was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}"
when: openshift_certificates_redeploy_ca | default(false) | bool
- - name: Prepare for node evacuation
+ - name: Prepare for node draining
command: >
{{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig
manage-node {{ openshift.node.nodename }}
@@ -230,11 +230,11 @@
delegate_to: "{{ groups.oo_first_master.0 }}"
when: openshift_certificates_redeploy_ca | default(false) | bool and was_schedulable | bool
- - name: Evacuate node
+ - name: Drain node
command: >
{{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig
manage-node {{ openshift.node.nodename }}
- --evacuate --force
+ --drain --force
delegate_to: "{{ groups.oo_first_master.0 }}"
when: openshift_certificates_redeploy_ca | default(false) | bool and was_schedulable | bool
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
index cefc7d12b..68b111df4 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
@@ -1,5 +1,5 @@
---
-- name: Evacuate and upgrade nodes
+- name: Drain and upgrade nodes
hosts: oo_nodes_to_upgrade
# This var must be set with -e on invocation, as it is not a per-host inventory var
# and is evaluated early. Values such as "20%" can also be used.
@@ -39,9 +39,9 @@
retries: 3
delay: 1
- - name: Evacuate Node for Kubelet upgrade
+ - name: Drain Node for Kubelet upgrade
command: >
- {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --evacuate --force
+ {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --drain --force
delegate_to: "{{ groups.oo_first_master.0 }}"
when: inventory_hostname in groups.oo_nodes_to_upgrade
diff --git a/roles/openshift_certificate_expiry/README.md b/roles/openshift_certificate_expiry/README.md
index d44438332..a88470bdd 100644
--- a/roles/openshift_certificate_expiry/README.md
+++ b/roles/openshift_certificate_expiry/README.md
@@ -9,7 +9,7 @@ include:
* Master/Node Service Certificates
* Router/Registry Service Certificates from etcd secrets
* Master/Node/Router/Registry/Admin `kubeconfig`s
-* Etcd certificates
+* Etcd certificates (including embedded)
This role pairs well with the redeploy certificates playbook:
@@ -111,12 +111,16 @@ There are two top-level keys in the saved JSON results, `data` and
`summary`.
The `data` key is a hash where the keys are the names of each host
-examined and the values are the check results for each respective
-host.
+examined and the values are the check results for the certificates
+identified on each respective host.
-The `summary` key is a hash that summarizes the number of certificates
-expiring within the configured warning window and the number of
-already expired certificates.
+The `summary` key is a hash that summarizes the total number of
+certificates:
+
+* examined on the entire cluster
+* OK
+* expiring within the configured warning window
+* already expired
The example below is abbreviated to save space:
@@ -193,7 +197,9 @@ The example below is abbreviated to save space:
},
"summary": {
"warning": 6,
- "expired": 0
+ "expired": 0,
+ "total": 7,
+ "ok": 1
}
}
```
diff --git a/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py
index bedd23fe8..5f102e960 100644
--- a/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py
+++ b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py
@@ -51,9 +51,13 @@ Example playbook usage:
total_warnings = sum([hostvars[h]['check_results']['summary']['warning'] for h in play_hosts])
total_expired = sum([hostvars[h]['check_results']['summary']['expired'] for h in play_hosts])
+ total_ok = sum([hostvars[h]['check_results']['summary']['ok'] for h in play_hosts])
+ total_total = sum([hostvars[h]['check_results']['summary']['total'] for h in play_hosts])
json_result['summary']['warning'] = total_warnings
json_result['summary']['expired'] = total_expired
+ json_result['summary']['ok'] = total_ok
+ json_result['summary']['total'] = total_total
return json_result
diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
index e838eb2d4..1fac284f2 100644
--- a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
+++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
@@ -467,7 +467,11 @@ an OpenShift Container Platform cluster
######################################################################
# Check etcd certs
+ #
+ # Two things to check: 'external' etcd, and embedded etcd.
######################################################################
+ # FIRST: The 'external' etcd
+ #
# Some values may be duplicated, make this a set for now so we
# unique them all
etcd_certs_to_check = set([])
@@ -506,6 +510,43 @@ an OpenShift Container Platform cluster
classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs)
######################################################################
+ # Now the embedded etcd
+ ######################################################################
+ try:
+ with open('/etc/origin/master/master-config.yaml', 'r') as fp:
+ cfg = yaml.load(fp)
+ except IOError:
+ # Not present
+ pass
+ else:
+ if cfg.get('etcdConfig', {}).get('servingInfo', {}).get('certFile', None) is not None:
+ # This is embedded
+ etcd_crt_name = cfg['etcdConfig']['servingInfo']['certFile']
+ else:
+ # Not embedded
+ etcd_crt_name = None
+
+ if etcd_crt_name is not None:
+ # etcd_crt_name is relative to the location of the
+ # master-config.yaml file
+ cfg_path = os.path.dirname(fp.name)
+ etcd_cert = os.path.join(cfg_path, etcd_crt_name)
+ with open(etcd_cert, 'r') as etcd_fp:
+ (cert_subject,
+ cert_expiry_date,
+ time_remaining) = load_and_handle_cert(etcd_fp.read(), now)
+
+ expire_check_result = {
+ 'cert_cn': cert_subject,
+ 'path': etcd_fp.name,
+ 'expiry': cert_expiry_date,
+ 'days_remaining': time_remaining.days,
+ 'health': None,
+ }
+
+ classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs)
+
+ ######################################################################
# /Check etcd certs
######################################################################
@@ -523,7 +564,7 @@ an OpenShift Container Platform cluster
######################################################################
# First the router certs
try:
- router_secrets_raw = subprocess.Popen('oc get secret router-certs -o yaml'.split(),
+ router_secrets_raw = subprocess.Popen('oc get -n default secret router-certs -o yaml'.split(),
stdout=subprocess.PIPE)
router_ds = yaml.load(router_secrets_raw.communicate()[0])
router_c = router_ds['data']['tls.crt']
@@ -552,7 +593,7 @@ an OpenShift Container Platform cluster
######################################################################
# Now for registry
try:
- registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(),
+ registry_secrets_raw = subprocess.Popen('oc get -n default secret registry-certificates -o yaml'.split(),
stdout=subprocess.PIPE)
registry_ds = yaml.load(registry_secrets_raw.communicate()[0])
registry_c = registry_ds['data']['registry.crt']
diff --git a/roles/openshift_node/README.md b/roles/openshift_node/README.md
index d1920c485..616f44c1d 100644
--- a/roles/openshift_node/README.md
+++ b/roles/openshift_node/README.md
@@ -43,7 +43,7 @@ Currently we support re-labeling nodes but we don't re-schedule running pods nor
```
oadm manage-node --schedulable=false ${NODE}
-oadm manage-node --evacuate ${NODE}
+oadm manage-node --drain ${NODE}
oadm manage-node --schedulable=true ${NODE}
````