summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/docker/defaults/main.yml4
-rw-r--r--roles/docker/tasks/package_docker.yml2
-rw-r--r--roles/docker/tasks/systemcontainer_crio.yml8
-rw-r--r--roles/etcd_common/tasks/backup.yml2
-rwxr-xr-xroles/lib_openshift/src/test/integration/oc_configmap.yml4
-rwxr-xr-xroles/lib_openshift/src/test/unit/test_oc_configmap.py6
-rw-r--r--roles/openshift_examples/README.md14
-rwxr-xr-xroles/openshift_facts/library/openshift_facts.py39
-rw-r--r--roles/openshift_gcp/tasks/main.yaml43
-rw-r--r--roles/openshift_gcp/templates/dns.j2.sh13
-rw-r--r--roles/openshift_gcp/templates/provision.j2.sh318
-rw-r--r--roles/openshift_gcp/templates/remove.j2.sh156
-rw-r--r--roles/openshift_gcp_image_prep/files/partition.conf3
-rw-r--r--roles/openshift_gcp_image_prep/tasks/main.yaml18
-rw-r--r--roles/openshift_health_checker/action_plugins/openshift_health_check.py158
-rw-r--r--roles/openshift_health_checker/openshift_checks/__init__.py117
-rw-r--r--roles/openshift_health_checker/openshift_checks/disk_availability.py9
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py4
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py8
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/logging.py4
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/mixins.py1
-rw-r--r--roles/openshift_health_checker/test/action_plugin_test.py125
-rw-r--r--roles/openshift_health_checker/test/disk_availability_test.py13
-rw-r--r--roles/openshift_health_checker/test/docker_image_availability_test.py35
-rw-r--r--roles/openshift_health_checker/test/elasticsearch_test.py18
-rw-r--r--roles/openshift_health_checker/test/logging_index_time_test.py8
-rw-r--r--roles/openshift_health_checker/test/openshift_check_test.py43
-rw-r--r--roles/openshift_health_checker/test/ovs_version_test.py2
-rw-r--r--roles/openshift_health_checker/test/package_availability_test.py2
-rw-r--r--roles/openshift_health_checker/test/package_update_test.py2
-rw-r--r--roles/openshift_hosted/tasks/registry/secure.yml2
-rw-r--r--roles/openshift_logging/tasks/install_logging.yaml2
-rw-r--r--roles/openshift_master/defaults/main.yml4
-rw-r--r--roles/openshift_master/tasks/main.yml28
-rw-r--r--roles/openshift_master/vars/main.yml19
-rw-r--r--roles/openshift_master_facts/filter_plugins/openshift_master.py25
-rw-r--r--roles/openshift_metrics/README.md2
-rw-r--r--roles/openshift_metrics/defaults/main.yaml1
-rw-r--r--roles/openshift_metrics/tasks/install_hawkular.yaml1
-rw-r--r--roles/openshift_metrics/templates/route.j23
-rw-r--r--roles/openshift_node/defaults/main.yml4
-rw-r--r--roles/openshift_node/tasks/main.yml7
-rw-r--r--roles/openshift_node/tasks/node_system_container.yml2
-rw-r--r--roles/openshift_prometheus/defaults/main.yaml2
-rw-r--r--roles/openshift_prometheus/tasks/install_prometheus.yaml5
-rw-r--r--roles/openshift_repos/README.md10
-rw-r--r--roles/openshift_sanitize_inventory/vars/main.yml7
-rw-r--r--roles/openshift_service_catalog/tasks/install.yml2
-rw-r--r--roles/openshift_service_catalog/tasks/wire_aggregator.yml5
-rw-r--r--roles/openshift_version/defaults/main.yml1
-rw-r--r--roles/openshift_version/tasks/main.yml6
-rw-r--r--roles/rhel_subscribe/tasks/enterprise.yml11
-rw-r--r--roles/rhel_subscribe/tasks/main.yml10
54 files changed, 1145 insertions, 195 deletions
diff --git a/roles/docker/defaults/main.yml b/roles/docker/defaults/main.yml
index 7e206ded1..81f3ee9e4 100644
--- a/roles/docker/defaults/main.yml
+++ b/roles/docker/defaults/main.yml
@@ -1,6 +1,6 @@
---
docker_cli_auth_config_path: '/root/.docker'
-oreg_url: ''
-oreg_host: "{{ oreg_url.split('/')[0] if '.' in oreg_url.split('/')[0] else '' }}"
+# oreg_url is defined by user input.
+oreg_host: "{{ oreg_url.split('/')[0] if (oreg_url is defined and '.' in oreg_url.split('/')[0]) else '' }}"
oreg_auth_credentials_replace: False
diff --git a/roles/docker/tasks/package_docker.yml b/roles/docker/tasks/package_docker.yml
index d685d77f2..16aea5067 100644
--- a/roles/docker/tasks/package_docker.yml
+++ b/roles/docker/tasks/package_docker.yml
@@ -4,7 +4,7 @@
when: not openshift.common.is_atomic | bool
register: curr_docker_version
retries: 4
- until: not curr_docker_version | failed
+ until: curr_docker_version | succeeded
changed_when: false
- name: Error out if Docker pre-installed but too old
diff --git a/roles/docker/tasks/systemcontainer_crio.yml b/roles/docker/tasks/systemcontainer_crio.yml
index 0bab0899c..8208fa68d 100644
--- a/roles/docker/tasks/systemcontainer_crio.yml
+++ b/roles/docker/tasks/systemcontainer_crio.yml
@@ -134,6 +134,14 @@
image: "{{ l_crio_image }}"
state: latest
+- name: Remove CRI-o default configuration files
+ file:
+ path: "{{ item }}"
+ state: absent
+ with_items:
+ - /etc/cni/net.d/200-loopback.conf
+ - /etc/cni/net.d/100-crio-bridge.conf
+
- name: Create the CRI-O configuration
template:
dest: /etc/crio/crio.conf
diff --git a/roles/etcd_common/tasks/backup.yml b/roles/etcd_common/tasks/backup.yml
index c1580640f..42d27c081 100644
--- a/roles/etcd_common/tasks/backup.yml
+++ b/roles/etcd_common/tasks/backup.yml
@@ -36,7 +36,7 @@
- name: Abort if insufficient disk space for etcd backup
fail:
msg: >
- {{ l_etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
+ {{ l_etcd_disk_usage.stdout|int*2 }} Kb disk space required for etcd backup,
{{ l_avail_disk.stdout }} Kb available.
when: l_etcd_disk_usage.stdout|int*2 > l_avail_disk.stdout|int
diff --git a/roles/lib_openshift/src/test/integration/oc_configmap.yml b/roles/lib_openshift/src/test/integration/oc_configmap.yml
index c0d200e73..6a452ccec 100755
--- a/roles/lib_openshift/src/test/integration/oc_configmap.yml
+++ b/roles/lib_openshift/src/test/integration/oc_configmap.yml
@@ -55,7 +55,7 @@
config: "{{ filename }}"
from_literal:
foo: notbar
- deployment_type: online
+ deployment_type: openshift-enterprise
- name: fetch the updated configmap
oc_configmap:
@@ -70,7 +70,7 @@
assert:
that:
- cmout.results.results[0].metadata.name == 'configmaptest'
- - cmout.results.results[0].data.deployment_type == 'online'
+ - cmout.results.results[0].data.deployment_type == 'openshift-enterprise'
- cmout.results.results[0].data.foo == 'notbar'
###### end update test ###########
diff --git a/roles/lib_openshift/src/test/unit/test_oc_configmap.py b/roles/lib_openshift/src/test/unit/test_oc_configmap.py
index 318fd6167..27042c64b 100755
--- a/roles/lib_openshift/src/test/unit/test_oc_configmap.py
+++ b/roles/lib_openshift/src/test/unit/test_oc_configmap.py
@@ -79,7 +79,7 @@ class OCConfigMapTest(unittest.TestCase):
''' Testing a configmap create '''
params = copy.deepcopy(OCConfigMapTest.params)
params['from_file'] = {'test': '/root/file'}
- params['from_literal'] = {'foo': 'bar', 'deployment_type': 'online'}
+ params['from_literal'] = {'foo': 'bar', 'deployment_type': 'openshift-enterprise'}
configmap = '''{
"apiVersion": "v1",
@@ -100,7 +100,7 @@ class OCConfigMapTest(unittest.TestCase):
"apiVersion": "v1",
"data": {
"foo": "bar",
- "deployment_type": "online",
+ "deployment_type": "openshift-enterprise",
"test": "this is a file\\n"
},
"kind": "ConfigMap",
@@ -128,7 +128,7 @@ class OCConfigMapTest(unittest.TestCase):
self.assertTrue(results['changed'])
self.assertEqual(results['results']['results'][0]['metadata']['name'], 'configmap')
- self.assertEqual(results['results']['results'][0]['data']['deployment_type'], 'online')
+ self.assertEqual(results['results']['results'][0]['data']['deployment_type'], 'openshift-enterprise')
@unittest.skipIf(six.PY3, 'py2 test only')
@mock.patch('os.path.exists')
diff --git a/roles/openshift_examples/README.md b/roles/openshift_examples/README.md
index 8cc479c73..014cef111 100644
--- a/roles/openshift_examples/README.md
+++ b/roles/openshift_examples/README.md
@@ -21,13 +21,13 @@ Facts
Role Variables
--------------
-| Name | Default value | |
-|-------------------------------------|-----------------------------------------------------|------------------------------------------|
-| openshift_examples_load_centos | true when openshift_deployment_typenot 'enterprise' | Load centos image streams |
-| openshift_examples_load_rhel | true if openshift_deployment_type is 'enterprise' | Load rhel image streams |
-| openshift_examples_load_db_templates| true | Loads database templates |
-| openshift_examples_load_quickstarts | true | Loads quickstarts ie: nodejs, rails, etc |
-| openshift_examples_load_xpaas | false | Loads xpass streams and templates |
+| Name | Default value | |
+|-------------------------------------|----------------------------------------------------------------|------------------------------------------|
+| openshift_examples_load_centos | true when openshift_deployment_type not 'openshift-enterprise' | Load centos image streams |
+| openshift_examples_load_rhel | true if openshift_deployment_type is 'openshift-enterprise' | Load rhel image streams |
+| openshift_examples_load_db_templates| true | Loads database templates |
+| openshift_examples_load_quickstarts | true | Loads quickstarts ie: nodejs, rails, etc |
+| openshift_examples_load_xpaas | false | Loads xpass streams and templates |
Dependencies
diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py
index 517e0231d..a76751e81 100755
--- a/roles/openshift_facts/library/openshift_facts.py
+++ b/roles/openshift_facts/library/openshift_facts.py
@@ -477,11 +477,7 @@ def set_selectors(facts):
facts if they were not already present
"""
- deployment_type = facts['common']['deployment_type']
- if deployment_type == 'online':
- selector = "type=infra"
- else:
- selector = "region=infra"
+ selector = "region=infra"
if 'hosted' not in facts:
facts['hosted'] = {}
@@ -568,7 +564,7 @@ def set_identity_providers_if_unset(facts):
name='allow_all', challenge=True, login=True,
kind='AllowAllPasswordIdentityProvider'
)
- if deployment_type in ['enterprise', 'atomic-enterprise', 'openshift-enterprise']:
+ if deployment_type == 'openshift-enterprise':
identity_provider = dict(
name='deny_all', challenge=True, login=True,
kind='DenyAllPasswordIdentityProvider'
@@ -770,13 +766,11 @@ def set_deployment_facts_if_unset(facts):
service_type = 'atomic-openshift'
if deployment_type == 'origin':
service_type = 'origin'
- elif deployment_type in ['enterprise']:
- service_type = 'openshift'
facts['common']['service_type'] = service_type
if 'docker' in facts:
deployment_type = facts['common']['deployment_type']
- if deployment_type in ['enterprise', 'atomic-enterprise', 'openshift-enterprise']:
+ if deployment_type == 'openshift-enterprise':
addtl_regs = facts['docker'].get('additional_registries', [])
ent_reg = 'registry.access.redhat.com'
if ent_reg not in addtl_regs:
@@ -787,30 +781,21 @@ def set_deployment_facts_if_unset(facts):
deployment_type = facts['common']['deployment_type']
if 'registry_url' not in facts[role]:
registry_url = 'openshift/origin-${component}:${version}'
- if deployment_type in ['enterprise', 'online', 'openshift-enterprise']:
+ if deployment_type == 'openshift-enterprise':
registry_url = 'openshift3/ose-${component}:${version}'
- elif deployment_type == 'atomic-enterprise':
- registry_url = 'aep3_beta/aep-${component}:${version}'
facts[role]['registry_url'] = registry_url
if 'master' in facts:
deployment_type = facts['common']['deployment_type']
openshift_features = ['Builder', 'S2IBuilder', 'WebConsole']
- if 'disabled_features' in facts['master']:
- if deployment_type == 'atomic-enterprise':
- curr_disabled_features = set(facts['master']['disabled_features'])
- facts['master']['disabled_features'] = list(curr_disabled_features.union(openshift_features))
- else:
+ if 'disabled_features' not in facts['master']:
if facts['common']['deployment_subtype'] == 'registry':
facts['master']['disabled_features'] = openshift_features
if 'node' in facts:
deployment_type = facts['common']['deployment_type']
if 'storage_plugin_deps' not in facts['node']:
- if deployment_type in ['openshift-enterprise', 'atomic-enterprise', 'origin']:
- facts['node']['storage_plugin_deps'] = ['ceph', 'glusterfs', 'iscsi']
- else:
- facts['node']['storage_plugin_deps'] = []
+ facts['node']['storage_plugin_deps'] = ['ceph', 'glusterfs', 'iscsi']
return facts
@@ -1671,7 +1656,7 @@ def set_container_facts_if_unset(facts):
facts
"""
deployment_type = facts['common']['deployment_type']
- if deployment_type in ['enterprise', 'openshift-enterprise']:
+ if deployment_type == 'openshift-enterprise':
master_image = 'openshift3/ose'
cli_image = master_image
node_image = 'openshift3/node'
@@ -1681,16 +1666,6 @@ def set_container_facts_if_unset(facts):
router_image = 'openshift3/ose-haproxy-router'
registry_image = 'openshift3/ose-docker-registry'
deployer_image = 'openshift3/ose-deployer'
- elif deployment_type == 'atomic-enterprise':
- master_image = 'aep3_beta/aep'
- cli_image = master_image
- node_image = 'aep3_beta/node'
- ovs_image = 'aep3_beta/openvswitch'
- etcd_image = 'registry.access.redhat.com/rhel7/etcd'
- pod_image = 'aep3_beta/aep-pod'
- router_image = 'aep3_beta/aep-haproxy-router'
- registry_image = 'aep3_beta/aep-docker-registry'
- deployer_image = 'aep3_beta/aep-deployer'
else:
master_image = 'openshift/origin'
cli_image = master_image
diff --git a/roles/openshift_gcp/tasks/main.yaml b/roles/openshift_gcp/tasks/main.yaml
new file mode 100644
index 000000000..ad205ba33
--- /dev/null
+++ b/roles/openshift_gcp/tasks/main.yaml
@@ -0,0 +1,43 @@
+#
+# This role relies on gcloud invoked via templated bash in order to
+# provide a high performance deployment option. The next logical step
+# is to transition to a deployment manager template which is then instantiated.
+# TODO: use a formal set of role parameters consistent with openshift_aws
+#
+---
+- name: Templatize DNS script
+ template: src=dns.j2.sh dest=/tmp/openshift_gcp_provision_dns.sh mode=u+rx
+- name: Templatize provision script
+ template: src=provision.j2.sh dest=/tmp/openshift_gcp_provision.sh mode=u+rx
+- name: Templatize de-provision script
+ template: src=remove.j2.sh dest=/tmp/openshift_gcp_provision_remove.sh mode=u+rx
+ when:
+ - state | default('present') == 'absent'
+
+- name: Provision GCP DNS domain
+ command: /tmp/openshift_gcp_provision_dns.sh
+ args:
+ chdir: "{{ playbook_dir }}/files"
+ register: dns_provision
+ when:
+ - state | default('present') == 'present'
+
+- name: Ensure that DNS resolves to the hosted zone
+ assert:
+ that:
+ - "lookup('dig', public_hosted_zone, 'qtype=NS', wantlist=True) | sort | join(',') == dns_provision.stdout"
+ msg: "The DNS domain {{ public_hosted_zone }} defined in 'public_hosted_zone' must have NS records pointing to the Google nameservers: '{{ dns_provision.stdout }}' instead of '{{ lookup('dig', public_hosted_zone, 'qtype=NS') }}'."
+ when:
+ - state | default('present') == 'present'
+
+- name: Provision GCP resources
+ command: /tmp/openshift_gcp_provision.sh
+ args:
+ chdir: "{{ playbook_dir }}/files"
+ when:
+ - state | default('present') == 'present'
+
+- name: De-provision GCP resources
+ command: /tmp/openshift_gcp_provision_remove.sh
+ when:
+ - state | default('present') == 'absent'
diff --git a/roles/openshift_gcp/templates/dns.j2.sh b/roles/openshift_gcp/templates/dns.j2.sh
new file mode 100644
index 000000000..eacf84b4d
--- /dev/null
+++ b/roles/openshift_gcp/templates/dns.j2.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -euo pipefail
+
+dns_zone="{{ dns_managed_zone | default(provision_prefix + 'managed-zone') }}"
+
+# Check the DNS managed zone in Google Cloud DNS, create it if it doesn't exist
+if ! gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zone}" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" dns managed-zones create "${dns_zone}" --dns-name "{{ public_hosted_zone }}" --description "{{ public_hosted_zone }} domain" >/dev/null
+fi
+
+# Always output the expected nameservers as a comma delimited list
+gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zone}" --format='value(nameServers)' | tr ';' ','
diff --git a/roles/openshift_gcp/templates/provision.j2.sh b/roles/openshift_gcp/templates/provision.j2.sh
new file mode 100644
index 000000000..e68e9683f
--- /dev/null
+++ b/roles/openshift_gcp/templates/provision.j2.sh
@@ -0,0 +1,318 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# Create SSH key for GCE
+if [ ! -f "{{ gce_ssh_private_key }}" ]; then
+ ssh-keygen -t rsa -f "{{ gce_ssh_private_key }}" -C gce-provision-cloud-user -N ''
+ ssh-add "{{ gce_ssh_private_key }}" || true
+fi
+
+# Check if the ~/.ssh/google_compute_engine.pub key is in the project metadata, and if not, add it there
+pub_key=$(cut -d ' ' -f 2 < "{{ gce_ssh_private_key }}.pub")
+key_tmp_file='/tmp/ocp-gce-keys'
+if ! gcloud --project "{{ gce_project_id }}" compute project-info describe | grep -q "$pub_key"; then
+ if gcloud --project "{{ gce_project_id }}" compute project-info describe | grep -q ssh-rsa; then
+ gcloud --project "{{ gce_project_id }}" compute project-info describe | grep ssh-rsa | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' -e 's/value: //' > "$key_tmp_file"
+ fi
+ echo -n 'cloud-user:' >> "$key_tmp_file"
+ cat "{{ gce_ssh_private_key }}.pub" >> "$key_tmp_file"
+ gcloud --project "{{ gce_project_id }}" compute project-info add-metadata --metadata-from-file "sshKeys=${key_tmp_file}"
+ rm -f "$key_tmp_file"
+fi
+
+metadata=""
+if [[ -n "{{ provision_gce_startup_script_file }}" ]]; then
+ if [[ ! -f "{{ provision_gce_startup_script_file }}" ]]; then
+ echo "Startup script file missing at {{ provision_gce_startup_script_file }} from=$(pwd)"
+ exit 1
+ fi
+ metadata+="--metadata-from-file=startup-script={{ provision_gce_startup_script_file }}"
+fi
+if [[ -n "{{ provision_gce_user_data_file }}" ]]; then
+ if [[ ! -f "{{ provision_gce_user_data_file }}" ]]; then
+ echo "User data file missing at {{ provision_gce_user_data_file }}"
+ exit 1
+ fi
+ if [[ -n "${metadata}" ]]; then
+ metadata+=","
+ else
+ metadata="--metadata-from-file="
+ fi
+ metadata+="user-data={{ provision_gce_user_data_file }}"
+fi
+
+# Select image or image family
+image="{{ provision_gce_registered_image }}"
+if ! gcloud --project "{{ gce_project_id }}" compute images describe "${image}" &>/dev/null; then
+ if ! gcloud --project "{{ gce_project_id }}" compute images describe-from-family "${image}" &>/dev/null; then
+ echo "No compute image or image-family found, create an image named '{{ provision_gce_registered_image }}' to continue'"
+ exit 1
+ fi
+ image="family/${image}"
+fi
+
+### PROVISION THE INFRASTRUCTURE ###
+
+dns_zone="{{ dns_managed_zone | default(provision_prefix + 'managed-zone') }}"
+
+# Check the DNS managed zone in Google Cloud DNS, create it if it doesn't exist and exit after printing NS servers
+if ! gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zone}" &>/dev/null; then
+ echo "DNS zone '${dns_zone}' doesn't exist. Must be configured prior to running this script"
+ exit 1
+fi
+
+# Create network
+if ! gcloud --project "{{ gce_project_id }}" compute networks describe "{{ gce_network_name }}" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute networks create "{{ gce_network_name }}" --mode "auto"
+else
+ echo "Network '{{ gce_network_name }}' already exists"
+fi
+
+# Firewall rules in a form:
+# ['name']='parameters for "gcloud compute firewall-rules create"'
+# For all possible parameters see: gcloud compute firewall-rules create --help
+range=""
+if [[ -n "{{ openshift_node_port_range }}" ]]; then
+ range=",tcp:{{ openshift_node_port_range }},udp:{{ openshift_node_port_range }}"
+fi
+declare -A FW_RULES=(
+ ['icmp']='--allow icmp'
+ ['ssh-external']='--allow tcp:22'
+ ['ssh-internal']='--allow tcp:22 --source-tags bastion'
+ ['master-internal']="--allow tcp:2224,tcp:2379,tcp:2380,tcp:4001,udp:4789,udp:5404,udp:5405,tcp:8053,udp:8053,tcp:8444,tcp:10250,tcp:10255,udp:10255,tcp:24224,udp:24224 --source-tags ocp --target-tags ocp-master"
+ ['master-external']="--allow tcp:80,tcp:443,tcp:1936,tcp:8080,tcp:8443${range} --target-tags ocp-master"
+ ['node-internal']="--allow udp:4789,tcp:10250,tcp:10255,udp:10255 --source-tags ocp --target-tags ocp-node,ocp-infra-node"
+ ['infra-node-internal']="--allow tcp:5000 --source-tags ocp --target-tags ocp-infra-node"
+ ['infra-node-external']="--allow tcp:80,tcp:443,tcp:1936${range} --target-tags ocp-infra-node"
+)
+for rule in "${!FW_RULES[@]}"; do
+ ( if ! gcloud --project "{{ gce_project_id }}" compute firewall-rules describe "{{ provision_prefix }}$rule" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute firewall-rules create "{{ provision_prefix }}$rule" --network "{{ gce_network_name }}" ${FW_RULES[$rule]}
+ else
+ echo "Firewall rule '{{ provision_prefix }}${rule}' already exists"
+ fi ) &
+done
+
+
+# Master IP
+( if ! gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-ssl-lb-ip" --global &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute addresses create "{{ provision_prefix }}master-ssl-lb-ip" --global
+else
+ echo "IP '{{ provision_prefix }}master-ssl-lb-ip' already exists"
+fi ) &
+
+# Internal master IP
+( if ! gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute addresses create "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}"
+else
+ echo "IP '{{ provision_prefix }}master-network-lb-ip' already exists"
+fi ) &
+
+# Router IP
+( if ! gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}router-network-lb-ip" --region "{{ gce_region_name }}" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute addresses create "{{ provision_prefix }}router-network-lb-ip" --region "{{ gce_region_name }}"
+else
+ echo "IP '{{ provision_prefix }}router-network-lb-ip' already exists"
+fi ) &
+
+
+{% for node_group in provision_gce_node_groups %}
+# configure {{ node_group.name }}
+(
+ if ! gcloud --project "{{ gce_project_id }}" compute instance-templates describe "{{ provision_prefix }}instance-template-{{ node_group.name }}" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute instance-templates create "{{ provision_prefix }}instance-template-{{ node_group.name }}" \
+ --machine-type "{{ node_group.machine_type }}" --network "{{ gce_network_name }}" \
+ --tags "{{ provision_prefix }}ocp,ocp,{{ node_group.tags }}" \
+ --boot-disk-size "{{ node_group.boot_disk_size }}" --boot-disk-type "pd-ssd" \
+ --scopes "logging-write,monitoring-write,useraccounts-ro,service-control,service-management,storage-ro,compute-rw" \
+ --image "${image}" ${metadata}
+ else
+ echo "Instance template '{{ provision_prefix }}instance-template-{{ node_group.name }}' already exists"
+ fi
+
+ # Create instance group
+ if ! gcloud --project "{{ gce_project_id }}" compute instance-groups managed describe "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute instance-groups managed create "{{ provision_prefix }}ig-{{ node_group.suffix }}" \
+ --zone "{{ gce_zone_name }}" --template "{{ provision_prefix }}instance-template-{{ node_group.name }}" --size "{{ node_group.scale }}"
+ else
+ echo "Instance group '{{ provision_prefix }}ig-{{ node_group.suffix }}' already exists"
+ fi
+) &
+{% endfor %}
+
+for i in `jobs -p`; do wait $i; done
+
+
+# Configure the master external LB rules
+(
+# Master health check
+if ! gcloud --project "{{ gce_project_id }}" compute health-checks describe "{{ provision_prefix }}master-ssl-lb-health-check" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute health-checks create https "{{ provision_prefix }}master-ssl-lb-health-check" --port "{{ internal_console_port }}" --request-path "/healthz"
+else
+ echo "Health check '{{ provision_prefix }}master-ssl-lb-health-check' already exists"
+fi
+
+gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-named-ports "{{ provision_prefix }}ig-m" \
+ --zone "{{ gce_zone_name }}" --named-ports "{{ provision_prefix }}port-name-master:{{ internal_console_port }}"
+
+# Master backend service
+if ! gcloud --project "{{ gce_project_id }}" compute backend-services describe "{{ provision_prefix }}master-ssl-lb-backend" --global &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute backend-services create "{{ provision_prefix }}master-ssl-lb-backend" --health-checks "{{ provision_prefix }}master-ssl-lb-health-check" --port-name "{{ provision_prefix }}port-name-master" --protocol "TCP" --global --timeout="{{ provision_gce_master_https_timeout | default('2m') }}"
+ gcloud --project "{{ gce_project_id }}" compute backend-services add-backend "{{ provision_prefix }}master-ssl-lb-backend" --instance-group "{{ provision_prefix }}ig-m" --global --instance-group-zone "{{ gce_zone_name }}"
+else
+ echo "Backend service '{{ provision_prefix }}master-ssl-lb-backend' already exists"
+fi
+
+# Master tcp proxy target
+if ! gcloud --project "{{ gce_project_id }}" compute target-tcp-proxies describe "{{ provision_prefix }}master-ssl-lb-target" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute target-tcp-proxies create "{{ provision_prefix }}master-ssl-lb-target" --backend-service "{{ provision_prefix }}master-ssl-lb-backend"
+else
+ echo "Proxy target '{{ provision_prefix }}master-ssl-lb-target' already exists"
+fi
+
+# Master forwarding rule
+if ! gcloud --project "{{ gce_project_id }}" compute forwarding-rules describe "{{ provision_prefix }}master-ssl-lb-rule" --global &>/dev/null; then
+ IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-ssl-lb-ip" --global --format='value(address)')
+ gcloud --project "{{ gce_project_id }}" compute forwarding-rules create "{{ provision_prefix }}master-ssl-lb-rule" --address "$IP" --global --ports "{{ console_port }}" --target-tcp-proxy "{{ provision_prefix }}master-ssl-lb-target"
+else
+ echo "Forwarding rule '{{ provision_prefix }}master-ssl-lb-rule' already exists"
+fi
+) &
+
+
+# Configure the master internal LB rules
+(
+# Internal master health check
+if ! gcloud --project "{{ gce_project_id }}" compute http-health-checks describe "{{ provision_prefix }}master-network-lb-health-check" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute http-health-checks create "{{ provision_prefix }}master-network-lb-health-check" --port "8080" --request-path "/healthz"
+else
+ echo "Health check '{{ provision_prefix }}master-network-lb-health-check' already exists"
+fi
+
+# Internal master target pool
+if ! gcloud --project "{{ gce_project_id }}" compute target-pools describe "{{ provision_prefix }}master-network-lb-pool" --region "{{ gce_region_name }}" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute target-pools create "{{ provision_prefix }}master-network-lb-pool" --http-health-check "{{ provision_prefix }}master-network-lb-health-check" --region "{{ gce_region_name }}"
+else
+ echo "Target pool '{{ provision_prefix }}master-network-lb-pool' already exists"
+fi
+
+# Internal master forwarding rule
+if ! gcloud --project "{{ gce_project_id }}" compute forwarding-rules describe "{{ provision_prefix }}master-network-lb-rule" --region "{{ gce_region_name }}" &>/dev/null; then
+ IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}" --format='value(address)')
+ gcloud --project "{{ gce_project_id }}" compute forwarding-rules create "{{ provision_prefix }}master-network-lb-rule" --address "$IP" --region "{{ gce_region_name }}" --target-pool "{{ provision_prefix }}master-network-lb-pool"
+else
+ echo "Forwarding rule '{{ provision_prefix }}master-network-lb-rule' already exists"
+fi
+) &
+
+
+# Configure the infra node rules
+(
+# Router health check
+if ! gcloud --project "{{ gce_project_id }}" compute http-health-checks describe "{{ provision_prefix }}router-network-lb-health-check" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute http-health-checks create "{{ provision_prefix }}router-network-lb-health-check" --port "1936" --request-path "/healthz"
+else
+ echo "Health check '{{ provision_prefix }}router-network-lb-health-check' already exists"
+fi
+
+# Router target pool
+if ! gcloud --project "{{ gce_project_id }}" compute target-pools describe "{{ provision_prefix }}router-network-lb-pool" --region "{{ gce_region_name }}" &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" compute target-pools create "{{ provision_prefix }}router-network-lb-pool" --http-health-check "{{ provision_prefix }}router-network-lb-health-check" --region "{{ gce_region_name }}"
+else
+ echo "Target pool '{{ provision_prefix }}router-network-lb-pool' already exists"
+fi
+
+# Router forwarding rule
+if ! gcloud --project "{{ gce_project_id }}" compute forwarding-rules describe "{{ provision_prefix }}router-network-lb-rule" --region "{{ gce_region_name }}" &>/dev/null; then
+ IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}router-network-lb-ip" --region "{{ gce_region_name }}" --format='value(address)')
+ gcloud --project "{{ gce_project_id }}" compute forwarding-rules create "{{ provision_prefix }}router-network-lb-rule" --address "$IP" --region "{{ gce_region_name }}" --target-pool "{{ provision_prefix }}router-network-lb-pool"
+else
+ echo "Forwarding rule '{{ provision_prefix }}router-network-lb-rule' already exists"
+fi
+) &
+
+for i in `jobs -p`; do wait $i; done
+
+# set the target pools
+(
+if [[ "ig-m" == "{{ provision_gce_router_network_instance_group }}" ]]; then
+ gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-target-pools "{{ provision_prefix }}ig-m" --target-pools "{{ provision_prefix }}master-network-lb-pool,{{ provision_prefix }}router-network-lb-pool" --zone "{{ gce_zone_name }}"
+else
+ gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-target-pools "{{ provision_prefix }}ig-m" --target-pools "{{ provision_prefix }}master-network-lb-pool" --zone "{{ gce_zone_name }}"
+ gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-target-pools "{{ provision_prefix }}{{ provision_gce_router_network_instance_group }}" --target-pools "{{ provision_prefix }}router-network-lb-pool" --zone "{{ gce_zone_name }}"
+fi
+) &
+
+# configure DNS
+(
+# Retry DNS changes until they succeed since this may be a shared resource
+while true; do
+ dns="${TMPDIR:-/tmp}/dns.yaml"
+ rm -f $dns
+
+ # DNS record for master lb
+ if ! gcloud --project "{{ gce_project_id }}" dns record-sets list -z "${dns_zone}" --name "{{ openshift_master_cluster_public_hostname }}" 2>/dev/null | grep -q "{{ openshift_master_cluster_public_hostname }}"; then
+ IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-ssl-lb-ip" --global --format='value(address)')
+ if [[ ! -f $dns ]]; then
+ gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns start -z "${dns_zone}"
+ fi
+ gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns add -z "${dns_zone}" --ttl 3600 --name "{{ openshift_master_cluster_public_hostname }}." --type A "$IP"
+ else
+ echo "DNS record for '{{ openshift_master_cluster_public_hostname }}' already exists"
+ fi
+
+ # DNS record for internal master lb
+ if ! gcloud --project "{{ gce_project_id }}" dns record-sets list -z "${dns_zone}" --name "{{ openshift_master_cluster_hostname }}" 2>/dev/null | grep -q "{{ openshift_master_cluster_hostname }}"; then
+ IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}" --format='value(address)')
+ if [[ ! -f $dns ]]; then
+ gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns start -z "${dns_zone}"
+ fi
+ gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns add -z "${dns_zone}" --ttl 3600 --name "{{ openshift_master_cluster_hostname }}." --type A "$IP"
+ else
+ echo "DNS record for '{{ openshift_master_cluster_hostname }}' already exists"
+ fi
+
+ # DNS record for router lb
+ if ! gcloud --project "{{ gce_project_id }}" dns record-sets list -z "${dns_zone}" --name "{{ wildcard_zone }}" 2>/dev/null | grep -q "{{ wildcard_zone }}"; then
+ IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}router-network-lb-ip" --region "{{ gce_region_name }}" --format='value(address)')
+ if [[ ! -f $dns ]]; then
+ gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns start -z "${dns_zone}"
+ fi
+ gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns add -z "${dns_zone}" --ttl 3600 --name "{{ wildcard_zone }}." --type A "$IP"
+ gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns add -z "${dns_zone}" --ttl 3600 --name "*.{{ wildcard_zone }}." --type CNAME "{{ wildcard_zone }}."
+ else
+ echo "DNS record for '{{ wildcard_zone }}' already exists"
+ fi
+
+ # Commit all DNS changes, retrying if preconditions are not met
+ if [[ -f $dns ]]; then
+ if ! out="$( gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns execute -z "${dns_zone}" 2>&1 )"; then
+ rc=$?
+ if [[ "${out}" == *"HTTPError 412: Precondition not met"* ]]; then
+ continue
+ fi
+ exit $rc
+ fi
+ fi
+ break
+done
+) &
+
+# Create bucket for registry
+(
+if ! gsutil ls -p "{{ gce_project_id }}" "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" &>/dev/null; then
+ gsutil mb -p "{{ gce_project_id }}" -l "{{ gce_region_name }}" "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}"
+else
+ echo "Bucket '{{ openshift_hosted_registry_storage_gcs_bucket }}' already exists"
+fi
+) &
+
+# wait until all node groups are stable
+{% for node_group in provision_gce_node_groups %}
+# wait for stable {{ node_group.name }}
+( gcloud --project "{{ gce_project_id }}" compute instance-groups managed wait-until-stable "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" --timeout=300) &
+{% endfor %}
+
+
+for i in `jobs -p`; do wait $i; done
diff --git a/roles/openshift_gcp/templates/remove.j2.sh b/roles/openshift_gcp/templates/remove.j2.sh
new file mode 100644
index 000000000..41ceab2b5
--- /dev/null
+++ b/roles/openshift_gcp/templates/remove.j2.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+
+set -euo pipefail
+
+function teardown_cmd() {
+ a=( $@ )
+ local name=$1
+ a=( "${a[@]:1}" )
+ local flag=0
+ local found=
+ for i in ${a[@]}; do
+ if [[ "$i" == "--"* ]]; then
+ found=true
+ break
+ fi
+ flag=$((flag+1))
+ done
+ if [[ -z "${found}" ]]; then
+ flag=$((flag+1))
+ fi
+ if gcloud --project "{{ gce_project_id }}" ${a[@]::$flag} describe "${name}" ${a[@]:$flag} &>/dev/null; then
+ gcloud --project "{{ gce_project_id }}" ${a[@]::$flag} delete -q "${name}" ${a[@]:$flag}
+ fi
+}
+
+function teardown() {
+ for i in `seq 1 20`; do
+ if teardown_cmd $@; then
+ break
+ fi
+ sleep 0.5
+ done
+}
+
+# Preemptively spin down the instances
+{% for node_group in provision_gce_node_groups %}
+# scale down {{ node_group.name }}
+(
+ # performs a delete and scale down as one operation to ensure maximum parallelism
+ if ! instances=$( gcloud --project "{{ gce_project_id }}" compute instance-groups managed list-instances "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" --format='value[terminator=","](instance)' ); then
+ exit 0
+ fi
+ instances="${instances%?}"
+ if [[ -z "${instances}" ]]; then
+ echo "warning: No instances in {{ node_group.name }}" 1>&2
+ exit 0
+ fi
+ if ! gcloud --project "{{ gce_project_id }}" compute instance-groups managed delete-instances "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" --instances "${instances}"; then
+ echo "warning: Unable to scale down the node group {{ node_group.name }}" 1>&2
+ exit 0
+ fi
+) &
+{% endfor %}
+
+# Bucket for registry
+(
+if gsutil ls -p "{{ gce_project_id }}" "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" &>/dev/null; then
+ gsutil -m rm -r "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}"
+fi
+) &
+
+# DNS
+(
+dns_zone="{{ dns_managed_zone | default(provision_prefix + 'managed-zone') }}"
+if gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zone}" &>/dev/null; then
+ # Retry DNS changes until they succeed since this may be a shared resource
+ while true; do
+ dns="${TMPDIR:-/tmp}/dns.yaml"
+ rm -f "${dns}"
+
+ # export all dns records that match into a zone format, and turn each line into a set of args for
+ # record-sets transaction.
+ gcloud dns record-sets export --project "{{ gce_project_id }}" -z "${dns_zone}" --zone-file-format "${dns}"
+ if grep -F -e '{{ openshift_master_cluster_hostname }}' -e '{{ openshift_master_cluster_public_hostname }}' -e '{{ wildcard_zone }}' "${dns}" | \
+ awk '{ print "--name", $1, "--ttl", $2, "--type", $4, $5; }' > "${dns}.input"
+ then
+ rm -f "${dns}"
+ gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns start -z "${dns_zone}"
+ cat "${dns}.input" | xargs -L1 gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file="${dns}" remove -z "${dns_zone}"
+
+ # Commit all DNS changes, retrying if preconditions are not met
+ if ! out="$( gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns execute -z "${dns_zone}" 2>&1 )"; then
+ rc=$?
+ if [[ "${out}" == *"HTTPError 412: Precondition not met"* ]]; then
+ continue
+ fi
+ exit $rc
+ fi
+ fi
+ rm "${dns}.input"
+ break
+ done
+fi
+) &
+
+(
+# Router network rules
+teardown "{{ provision_prefix }}router-network-lb-rule" compute forwarding-rules --region "{{ gce_region_name }}"
+teardown "{{ provision_prefix }}router-network-lb-pool" compute target-pools --region "{{ gce_region_name }}"
+teardown "{{ provision_prefix }}router-network-lb-health-check" compute http-health-checks
+teardown "{{ provision_prefix }}router-network-lb-ip" compute addresses --region "{{ gce_region_name }}"
+
+# Internal master network rules
+teardown "{{ provision_prefix }}master-network-lb-rule" compute forwarding-rules --region "{{ gce_region_name }}"
+teardown "{{ provision_prefix }}master-network-lb-pool" compute target-pools --region "{{ gce_region_name }}"
+teardown "{{ provision_prefix }}master-network-lb-health-check" compute http-health-checks
+teardown "{{ provision_prefix }}master-network-lb-ip" compute addresses --region "{{ gce_region_name }}"
+) &
+
+(
+# Master SSL network rules
+teardown "{{ provision_prefix }}master-ssl-lb-rule" compute forwarding-rules --global
+teardown "{{ provision_prefix }}master-ssl-lb-target" compute target-tcp-proxies
+teardown "{{ provision_prefix }}master-ssl-lb-ip" compute addresses --global
+teardown "{{ provision_prefix }}master-ssl-lb-backend" compute backend-services --global
+teardown "{{ provision_prefix }}master-ssl-lb-health-check" compute health-checks
+) &
+
+#Firewall rules
+#['name']='parameters for "gcloud compute firewall-rules create"'
+#For all possible parameters see: gcloud compute firewall-rules create --help
+declare -A FW_RULES=(
+ ['icmp']=""
+ ['ssh-external']=""
+ ['ssh-internal']=""
+ ['master-internal']=""
+ ['master-external']=""
+ ['node-internal']=""
+ ['infra-node-internal']=""
+ ['infra-node-external']=""
+)
+for rule in "${!FW_RULES[@]}"; do
+ ( if gcloud --project "{{ gce_project_id }}" compute firewall-rules describe "{{ provision_prefix }}$rule" &>/dev/null; then
+ # retry a few times because this call can be flaky
+ for i in `seq 1 3`; do
+ if gcloud -q --project "{{ gce_project_id }}" compute firewall-rules delete "{{ provision_prefix }}$rule"; then
+ break
+ fi
+ done
+ fi ) &
+done
+
+for i in `jobs -p`; do wait $i; done
+
+{% for node_group in provision_gce_node_groups %}
+# teardown {{ node_group.name }} - any load balancers referencing these groups must be removed
+(
+ teardown "{{ provision_prefix }}ig-{{ node_group.suffix }}" compute instance-groups managed --zone "{{ gce_zone_name }}"
+ teardown "{{ provision_prefix }}instance-template-{{ node_group.name }}" compute instance-templates
+) &
+{% endfor %}
+
+for i in `jobs -p`; do wait $i; done
+
+# Network
+teardown "{{ gce_network_name }}" compute networks
diff --git a/roles/openshift_gcp_image_prep/files/partition.conf b/roles/openshift_gcp_image_prep/files/partition.conf
new file mode 100644
index 000000000..b87e5e0b6
--- /dev/null
+++ b/roles/openshift_gcp_image_prep/files/partition.conf
@@ -0,0 +1,3 @@
+[Service]
+ExecStartPost=-/usr/bin/growpart /dev/sda 1
+ExecStartPost=-/sbin/xfs_growfs /
diff --git a/roles/openshift_gcp_image_prep/tasks/main.yaml b/roles/openshift_gcp_image_prep/tasks/main.yaml
new file mode 100644
index 000000000..fee5ab618
--- /dev/null
+++ b/roles/openshift_gcp_image_prep/tasks/main.yaml
@@ -0,0 +1,18 @@
+---
+# GCE instances are starting with xfs AND barrier=1, which is only for extfs.
+- name: Remove barrier=1 from XFS fstab entries
+ lineinfile:
+ path: /etc/fstab
+ regexp: '^(.+)xfs(.+?),?barrier=1,?(.*?)$'
+ line: '\1xfs\2 \4'
+ backrefs: yes
+
+- name: Ensure the root filesystem has XFS group quota turned on
+ lineinfile:
+ path: /boot/grub2/grub.cfg
+ regexp: '^(.*)linux16 (.*)$'
+ line: '\1linux16 \2 rootflags=gquota'
+ backrefs: yes
+
+- name: Ensure the root partition grows on startup
+ copy: src=partition.conf dest=/etc/systemd/system/google-instance-setup.service.d/
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
index d02a43655..326176273 100644
--- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py
+++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
@@ -3,7 +3,10 @@ Ansible action plugin to execute health checks in OpenShift clusters.
"""
import sys
import os
+import base64
import traceback
+import errno
+import json
from collections import defaultdict
from ansible.plugins.action import ActionBase
@@ -38,8 +41,13 @@ class ActionModule(ActionBase):
# storing the information we need in the result.
result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context')
+ # if the user wants to write check results to files, they provide this directory:
+ output_dir = task_vars.get("openshift_checks_output_dir")
+ if output_dir:
+ output_dir = os.path.join(output_dir, task_vars["ansible_host"])
+
try:
- known_checks = self.load_known_checks(tmp, task_vars)
+ known_checks = self.load_known_checks(tmp, task_vars, output_dir)
args = self._task.args
requested_checks = normalize(args.get('checks', []))
@@ -65,21 +73,20 @@ class ActionModule(ActionBase):
for name in resolved_checks:
display.banner("CHECK [{} : {}]".format(name, task_vars["ansible_host"]))
- check = known_checks[name]
- check_results[name] = run_check(name, check, user_disabled_checks)
- if check.changed:
- check_results[name]["changed"] = True
+ check_results[name] = run_check(name, known_checks[name], user_disabled_checks, output_dir)
result["changed"] = any(r.get("changed") for r in check_results.values())
if any(r.get("failed") for r in check_results.values()):
result["failed"] = True
result["msg"] = "One or more checks failed"
+ write_result_to_output_dir(output_dir, result)
return result
- def load_known_checks(self, tmp, task_vars):
+ def load_known_checks(self, tmp, task_vars, output_dir=None):
"""Find all existing checks and return a mapping of names to instances."""
load_checks()
+ want_full_results = bool(output_dir)
known_checks = {}
for cls in OpenShiftCheck.subclasses():
@@ -90,7 +97,12 @@ class ActionModule(ActionBase):
"duplicate check name '{}' in: '{}' and '{}'"
"".format(name, full_class_name(cls), full_class_name(other_cls))
)
- known_checks[name] = cls(execute_module=self._execute_module, tmp=tmp, task_vars=task_vars)
+ known_checks[name] = cls(
+ execute_module=self._execute_module,
+ tmp=tmp,
+ task_vars=task_vars,
+ want_full_results=want_full_results
+ )
return known_checks
@@ -185,8 +197,10 @@ def normalize(checks):
return [name.strip() for name in checks if name.strip()]
-def run_check(name, check, user_disabled_checks):
+def run_check(name, check, user_disabled_checks, output_dir=None):
"""Run a single check if enabled and return a result dict."""
+
+ # determine if we're going to run the check (not inactive or disabled)
if name in user_disabled_checks or '*' in user_disabled_checks:
return dict(skipped=True, skipped_reason="Disabled by user request")
@@ -201,12 +215,134 @@ def run_check(name, check, user_disabled_checks):
if not is_active:
return dict(skipped=True, skipped_reason="Not active for this host")
+ # run the check
+ result = {}
try:
- return check.run()
+ result = check.run()
except OpenShiftCheckException as exc:
- return dict(failed=True, msg=str(exc))
+ check.register_failure(exc)
+ except Exception as exc:
+ check.register_failure("\n".join([str(exc), traceback.format_exc()]))
+
+ # process the check state; compose the result hash, write files as needed
+ if check.changed:
+ result["changed"] = True
+ if check.failures or result.get("failed"):
+ if "msg" in result: # failure result has msg; combine with any registered failures
+ check.register_failure(result.get("msg"))
+ result["failures"] = [(fail.name, str(fail)) for fail in check.failures]
+ result["failed"] = True
+ result["msg"] = "\n".join(str(fail) for fail in check.failures)
+ write_to_output_file(output_dir, name + ".failures.json", result["failures"])
+ if check.logs:
+ write_to_output_file(output_dir, name + ".log.json", check.logs)
+ if check.files_to_save:
+ write_files_to_save(output_dir, check)
+
+ return result
+
+
+def prepare_output_dir(dirname):
+ """Create the directory, including parents. Return bool for success/failure."""
+ try:
+ os.makedirs(dirname)
+ return True
+ except OSError as exc:
+ # trying to create existing dir leads to error;
+ # that error is fine, but for any other, assume the dir is not there
+ return exc.errno == errno.EEXIST
+
+
+def copy_remote_file_to_dir(check, file_to_save, output_dir, fname):
+ """Copy file from remote host to local file in output_dir, if given."""
+ if not output_dir or not prepare_output_dir(output_dir):
+ return
+ local_file = os.path.join(output_dir, fname)
+
+ # pylint: disable=broad-except; do not need to do anything about failure to write dir/file
+ # and do not want exceptions to break anything.
+ try:
+ # NOTE: it would have been nice to copy the file directly without loading it into
+ # memory, but there does not seem to be a good way to do this via ansible.
+ result = check.execute_module("slurp", dict(src=file_to_save), register=False)
+ if result.get("failed"):
+ display.warning("Could not retrieve file {}: {}".format(file_to_save, result.get("msg")))
+ return
+
+ content = result["content"]
+ if result.get("encoding") == "base64":
+ content = base64.b64decode(content)
+ with open(local_file, "wb") as outfile:
+ outfile.write(content)
+ except Exception as exc:
+ display.warning("Failed writing remote {} to local {}: {}".format(file_to_save, local_file, exc))
+ return
+
+
+def _no_fail(obj):
+ # pylint: disable=broad-except; do not want serialization to fail for any reason
+ try:
+ return str(obj)
+ except Exception:
+ return "[not serializable]"
+
+
+def write_to_output_file(output_dir, filename, data):
+ """If output_dir provided, write data to file. Serialize as JSON if data is not a string."""
+
+ if not output_dir or not prepare_output_dir(output_dir):
+ return
+ filename = os.path.join(output_dir, filename)
+ try:
+ with open(filename, 'w') as outfile:
+ if isinstance(data, string_types):
+ outfile.write(data)
+ else:
+ json.dump(data, outfile, sort_keys=True, indent=4, default=_no_fail)
+ # pylint: disable=broad-except; do not want serialization/write to break for any reason
+ except Exception as exc:
+ display.warning("Could not write output file {}: {}".format(filename, exc))
+
+
+def write_result_to_output_dir(output_dir, result):
+ """If output_dir provided, write the result as json to result.json.
+
+ Success/failure of the write is recorded as "output_files" in the result hash afterward.
+ Otherwise this is much like write_to_output_file.
+ """
+
+ if not output_dir:
+ return
+ if not prepare_output_dir(output_dir):
+ result["output_files"] = "Error creating output directory " + output_dir
+ return
+
+ filename = os.path.join(output_dir, "result.json")
+ try:
+ with open(filename, 'w') as outfile:
+ json.dump(result, outfile, sort_keys=True, indent=4, default=_no_fail)
+ result["output_files"] = "Check results for this host written to " + filename
+ # pylint: disable=broad-except; do not want serialization/write to break for any reason
except Exception as exc:
- return dict(failed=True, msg=str(exc), exception=traceback.format_exc())
+ result["output_files"] = "Error writing check results to {}:\n{}".format(filename, exc)
+
+
+def write_files_to_save(output_dir, check):
+ """Write files to check subdir in output dir."""
+ if not output_dir:
+ return
+ output_dir = os.path.join(output_dir, check.name)
+ seen_file = defaultdict(lambda: 0)
+ for file_to_save in check.files_to_save:
+ fname = file_to_save.filename
+ while seen_file[fname]: # just to be sure we never re-write a file, append numbers as needed
+ seen_file[fname] += 1
+ fname = "{}.{}".format(fname, seen_file[fname])
+ seen_file[fname] += 1
+ if file_to_save.remote_filename:
+ copy_remote_file_to_dir(check, file_to_save.remote_filename, output_dir, fname)
+ else:
+ write_to_output_file(output_dir, fname, file_to_save.contents)
def full_class_name(cls):
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py
index 987c955b6..28cb53cc5 100644
--- a/roles/openshift_health_checker/openshift_checks/__init__.py
+++ b/roles/openshift_health_checker/openshift_checks/__init__.py
@@ -2,9 +2,11 @@
Health checks for OpenShift clusters.
"""
+import json
import operator
import os
import time
+import collections
from abc import ABCMeta, abstractmethod, abstractproperty
from importlib import import_module
@@ -28,7 +30,7 @@ class OpenShiftCheckException(Exception):
class OpenShiftCheckExceptionList(OpenShiftCheckException):
- """A container for multiple logging errors that may be detected in one check."""
+ """A container for multiple errors that may be detected in one check."""
def __init__(self, errors):
self.errors = errors
super(OpenShiftCheckExceptionList, self).__init__(
@@ -41,29 +43,53 @@ class OpenShiftCheckExceptionList(OpenShiftCheckException):
return self.errors[index]
+FileToSave = collections.namedtuple("FileToSave", "filename contents remote_filename")
+
+
+# pylint: disable=too-many-instance-attributes; all represent significantly different state.
+# Arguably they could be separated into two hashes, one for storing parameters, and one for
+# storing result state; but that smells more like clutter than clarity.
@six.add_metaclass(ABCMeta)
class OpenShiftCheck(object):
- """
- A base class for defining checks for an OpenShift cluster environment.
+ """A base class for defining checks for an OpenShift cluster environment.
- Expect optional params: method execute_module, dict task_vars, and string tmp.
+ Optional init params: method execute_module, dict task_vars, and string tmp
execute_module is expected to have a signature compatible with _execute_module
from ansible plugins/action/__init__.py, e.g.:
def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *args):
This is stored so that it can be invoked in subclasses via check.execute_module("name", args)
which provides the check's stored task_vars and tmp.
+
+ Optional init param: want_full_results
+ If the check can gather logs, tarballs, etc., do so when True; but no need to spend
+ the time if they're not wanted (won't be written to output directory).
"""
- def __init__(self, execute_module=None, task_vars=None, tmp=None):
+ def __init__(self, execute_module=None, task_vars=None, tmp=None, want_full_results=False):
+ # store a method for executing ansible modules from the check
self._execute_module = execute_module
+ # the task variables and tmpdir passed into the health checker task
self.task_vars = task_vars or {}
self.tmp = tmp
+ # a boolean for disabling the gathering of results (files, computations) that won't
+ # actually be recorded/used
+ self.want_full_results = want_full_results
+
# mainly for testing purposes; see execute_module_with_retries
self._module_retries = 3
self._module_retry_interval = 5 # seconds
+ # state to be recorded for inspection after the check runs:
+ #
# set to True when the check changes the host, for accurate total "changed" count
self.changed = False
+ # list of OpenShiftCheckException for check to report (alternative to returning a failed result)
+ self.failures = []
+ # list of FileToSave - files the check specifies to be written locally if so configured
+ self.files_to_save = []
+ # log messages for the check - tuples of (description, msg) where msg is serializable.
+ # These are intended to be a sequential record of what the check observed and determined.
+ self.logs = []
@abstractproperty
def name(self):
@@ -86,7 +112,13 @@ class OpenShiftCheck(object):
@abstractmethod
def run(self):
- """Executes a check, normally implemented as a module."""
+ """Executes a check against a host and returns a result hash similar to Ansible modules.
+
+ Actually the direction ahead is to record state in the attributes and
+ not bother building a result hash. Instead, return an empty hash and let
+ the action plugin fill it in. Or raise an OpenShiftCheckException.
+ Returning a hash may become deprecated if it does not prove necessary.
+ """
return {}
@classmethod
@@ -98,7 +130,43 @@ class OpenShiftCheck(object):
for subclass in subclass.subclasses():
yield subclass
- def execute_module(self, module_name=None, module_args=None):
+ def register_failure(self, error):
+ """Record in the check that a failure occurred.
+
+ Recorded failures are merged into the result hash for now. They are also saved to output directory
+ (if provided) <check>.failures.json and registered as a log entry for context <check>.log.json.
+ """
+ # It should be an exception; make it one if not
+ if not isinstance(error, OpenShiftCheckException):
+ error = OpenShiftCheckException(str(error))
+ self.failures.append(error)
+ # duplicate it in the logs so it can be seen in the context of any
+ # information that led to the failure
+ self.register_log("failure: " + error.name, str(error))
+
+ def register_log(self, context, msg):
+ """Record an entry for the check log.
+
+ Notes are intended to serve as context of the whole sequence of what the check observed.
+ They are be saved as an ordered list in a local check log file.
+ They are not to included in the result or in the ansible log; it's just for the record.
+ """
+ self.logs.append([context, msg])
+
+ def register_file(self, filename, contents=None, remote_filename=""):
+ """Record a file that a check makes available to be saved individually to output directory.
+
+ Either file contents should be passed in, or a file to be copied from the remote host
+ should be specified. Contents that are not a string are to be serialized as JSON.
+
+ NOTE: When copying a file from remote host, it is slurped into memory as base64, meaning
+ you should avoid using this on huge files (more than say 10M).
+ """
+ if contents is None and not remote_filename:
+ raise OpenShiftCheckException("File data/source not specified; this is a bug in the check.")
+ self.files_to_save.append(FileToSave(filename, contents, remote_filename))
+
+ def execute_module(self, module_name=None, module_args=None, save_as_name=None, register=True):
"""Invoke an Ansible module from a check.
Invoke stored _execute_module, normally copied from the action
@@ -110,6 +178,12 @@ class OpenShiftCheck(object):
Ansible version).
So e.g. check.execute_module("foo", dict(arg1=...))
+
+ save_as_name specifies a file name for saving the result to an output directory,
+ if needed, and is intended to uniquely identify the result of invoking execute_module.
+ If not provided, the module name will be used.
+ If register is set False, then the result won't be registered in logs or files to save.
+
Return: result hash from module execution.
"""
if self._execute_module is None:
@@ -117,7 +191,20 @@ class OpenShiftCheck(object):
self.__class__.__name__ +
" invoked execute_module without providing the method at initialization."
)
- return self._execute_module(module_name, module_args, self.tmp, self.task_vars)
+ result = self._execute_module(module_name, module_args, self.tmp, self.task_vars)
+ if result.get("changed"):
+ self.changed = True
+ for output in ["result", "stdout"]:
+ # output is often JSON; attempt to decode
+ try:
+ result[output + "_json"] = json.loads(result[output])
+ except (KeyError, ValueError):
+ pass
+
+ if register:
+ self.register_log("execute_module: " + module_name, result)
+ self.register_file(save_as_name or module_name + ".json", result)
+ return result
def execute_module_with_retries(self, module_name, module_args):
"""Run execute_module and retry on failure."""
@@ -188,8 +275,12 @@ class OpenShiftCheck(object):
'There is a bug in this check. While trying to convert variable \n'
' "{var}={value}"\n'
'the given converter cannot be used or failed unexpectedly:\n'
- '{error}'.format(var=".".join(keys), value=value, error=error)
- )
+ '{type}: {error}'.format(
+ var=".".join(keys),
+ value=value,
+ type=error.__class__.__name__,
+ error=error
+ ))
@staticmethod
def get_major_minor_version(openshift_image_tag):
@@ -231,7 +322,9 @@ class OpenShiftCheck(object):
mount_point = os.path.dirname(mount_point)
try:
- return mount_for_path[mount_point]
+ mount = mount_for_path[mount_point]
+ self.register_log("mount point for " + path, mount)
+ return mount
except KeyError:
known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path))
raise OpenShiftCheckException(
@@ -259,7 +352,7 @@ def load_checks(path=None, subpkg=""):
modules = modules + load_checks(os.path.join(path, name), subpkg + "." + name)
continue
- if name.endswith(".py") and not name.startswith(".") and name not in LOADER_EXCLUDES:
+ if name.endswith(".py") and name not in LOADER_EXCLUDES:
modules.append(import_module(__package__ + subpkg + "." + name[:-3]))
return modules
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py
index f302fd14b..cdf56e959 100644
--- a/roles/openshift_health_checker/openshift_checks/disk_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py
@@ -70,6 +70,10 @@ class DiskAvailability(OpenShiftCheck):
# If it is not a number, then it should be a nested dict.
pass
+ self.register_log("recommended thresholds", self.recommended_disk_space_bytes)
+ if user_config:
+ self.register_log("user-configured thresholds", user_config)
+
# TODO: as suggested in
# https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021,
# maybe we could support checking disk availability in paths that are
@@ -113,10 +117,7 @@ class DiskAvailability(OpenShiftCheck):
'in your Ansible inventory, and lower the recommended disk space availability\n'
'if necessary for this upgrade.').format(config_bytes)
- return {
- 'failed': True,
- 'msg': msg,
- }
+ self.register_failure(msg)
return {}
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 9c35f0f92..98372d979 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -109,8 +109,6 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
# containerized etcd may not have openshift_image_tag, see bz 1466622
image_tag = self.get_var("openshift_image_tag", default="latest")
image_info = DEPLOYMENT_IMAGE_INFO[deployment_type]
- if not image_info:
- return required
# template for images that run on top of OpenShift
image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}")
@@ -160,7 +158,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
deployment_type = self.get_var("openshift_deployment_type")
if deployment_type == "origin" and "docker.io" not in regs:
regs.append("docker.io")
- elif "enterprise" in deployment_type and "registry.access.redhat.com" not in regs:
+ elif deployment_type == 'openshift-enterprise' and "registry.access.redhat.com" not in regs:
regs.append("registry.access.redhat.com")
return regs
diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
index 7fc843fd7..986a01f38 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
@@ -72,7 +72,7 @@ class Elasticsearch(LoggingCheck):
for pod_name in pods_by_name.keys():
# Compare what each ES node reports as master and compare for split brain
get_master_cmd = self._build_es_curl_cmd(pod_name, "https://localhost:9200/_cat/master")
- master_name_str = self.exec_oc(get_master_cmd, [])
+ master_name_str = self.exec_oc(get_master_cmd, [], save_as_name="get_master_names.json")
master_names = (master_name_str or '').split(' ')
if len(master_names) > 1:
es_master_names.add(master_names[1])
@@ -113,7 +113,7 @@ class Elasticsearch(LoggingCheck):
# get ES cluster nodes
node_cmd = self._build_es_curl_cmd(list(pods_by_name.keys())[0], 'https://localhost:9200/_nodes')
- cluster_node_data = self.exec_oc(node_cmd, [])
+ cluster_node_data = self.exec_oc(node_cmd, [], save_as_name="get_es_nodes.json")
try:
cluster_nodes = json.loads(cluster_node_data)['nodes']
except (ValueError, KeyError):
@@ -142,7 +142,7 @@ class Elasticsearch(LoggingCheck):
errors = []
for pod_name in pods_by_name.keys():
cluster_health_cmd = self._build_es_curl_cmd(pod_name, 'https://localhost:9200/_cluster/health?pretty=true')
- cluster_health_data = self.exec_oc(cluster_health_cmd, [])
+ cluster_health_data = self.exec_oc(cluster_health_cmd, [], save_as_name='get_es_health.json')
try:
health_res = json.loads(cluster_health_data)
if not health_res or not health_res.get('status'):
@@ -171,7 +171,7 @@ class Elasticsearch(LoggingCheck):
errors = []
for pod_name in pods_by_name.keys():
df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name)
- disk_output = self.exec_oc(df_cmd, [])
+ disk_output = self.exec_oc(df_cmd, [], save_as_name='get_pv_diskspace.json')
lines = disk_output.splitlines()
# expecting one header looking like 'IUse% Use%' and one body line
body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$'
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py
index ecd8adb64..06bdfebf6 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/logging.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py
@@ -78,7 +78,7 @@ class LoggingCheck(OpenShiftCheck):
"""Returns the namespace in which logging is configured to deploy."""
return self.get_var("openshift_logging_namespace", default="logging")
- def exec_oc(self, cmd_str="", extra_args=None):
+ def exec_oc(self, cmd_str="", extra_args=None, save_as_name=None):
"""
Execute an 'oc' command in the remote host.
Returns: output of command and namespace,
@@ -92,7 +92,7 @@ class LoggingCheck(OpenShiftCheck):
"extra_args": list(extra_args) if extra_args else [],
}
- result = self.execute_module("ocutil", args)
+ result = self.execute_module("ocutil", args, save_as_name=save_as_name)
if result.get("failed"):
if result['result'] == '[Errno 2] No such file or directory':
raise CouldNotUseOc(
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py
index d781db649..cacdf4213 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py
@@ -104,7 +104,7 @@ class LoggingIndexTime(LoggingCheck):
"https://logging-es:9200/project.{namespace}*/_count?q=message:{uuid}"
)
exec_cmd = exec_cmd.format(pod_name=pod_name, namespace=self.logging_namespace(), uuid=uuid)
- result = self.exec_oc(exec_cmd, [])
+ result = self.exec_oc(exec_cmd, [], save_as_name="query_for_uuid.json")
try:
count = json.loads(result)["count"]
diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py
index 24f1d938a..b90ebf6dd 100644
--- a/roles/openshift_health_checker/openshift_checks/mixins.py
+++ b/roles/openshift_health_checker/openshift_checks/mixins.py
@@ -49,5 +49,4 @@ class DockerHostMixin(object):
" {deps}\n{msg}"
).format(deps=',\n '.join(self.dependencies), msg=msg)
failed = result.get("failed", False) or result.get("rc", 0) != 0
- self.changed = result.get("changed", False)
return msg, failed
diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py
index 58864da21..f14887303 100644
--- a/roles/openshift_health_checker/test/action_plugin_test.py
+++ b/roles/openshift_health_checker/test/action_plugin_test.py
@@ -3,10 +3,12 @@ import pytest
from ansible.playbook.play_context import PlayContext
from openshift_health_check import ActionModule, resolve_checks
-from openshift_checks import OpenShiftCheckException
+from openshift_health_check import copy_remote_file_to_dir, write_result_to_output_dir, write_to_output_file
+from openshift_checks import OpenShiftCheckException, FileToSave
-def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, run_exception=None, changed=False):
+def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, run_exception=None,
+ run_logs=None, run_files=None, changed=False, get_var_return=None):
"""Returns a new class that is compatible with OpenShiftCheck for testing."""
_name, _tags = name, tags
@@ -14,12 +16,16 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru
class FakeCheck(object):
name = _name
tags = _tags or []
- changed = False
- def __init__(self, execute_module=None, task_vars=None, tmp=None):
- pass
+ def __init__(self, **_):
+ self.changed = False
+ self.failures = []
+ self.logs = run_logs or []
+ self.files_to_save = run_files or []
def is_active(self):
+ if isinstance(is_active, Exception):
+ raise is_active
return is_active
def run(self):
@@ -28,6 +34,13 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru
raise run_exception
return run_return
+ def get_var(*args, **_):
+ return get_var_return
+
+ def register_failure(self, exc):
+ self.failures.append(OpenShiftCheckException(str(exc)))
+ return
+
return FakeCheck
@@ -98,13 +111,18 @@ def test_action_plugin_cannot_load_checks_with_the_same_name(plugin, task_vars,
assert failed(result, msg_has=['duplicate', 'duplicate_name', 'FakeCheck'])
-def test_action_plugin_skip_non_active_checks(plugin, task_vars, monkeypatch):
- checks = [fake_check(is_active=False)]
+@pytest.mark.parametrize('is_active, skipped_reason', [
+ (False, "Not active for this host"),
+ (Exception("borked"), "exception"),
+])
+def test_action_plugin_skip_non_active_checks(is_active, skipped_reason, plugin, task_vars, monkeypatch):
+ checks = [fake_check(is_active=is_active)]
monkeypatch.setattr('openshift_checks.OpenShiftCheck.subclasses', classmethod(lambda cls: checks))
result = plugin.run(tmp=None, task_vars=task_vars)
- assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Not active for this host")
+ assert result['checks']['fake_check'].get('skipped')
+ assert skipped_reason in result['checks']['fake_check'].get('skipped_reason')
assert not failed(result)
assert not changed(result)
assert not skipped(result)
@@ -128,10 +146,21 @@ def test_action_plugin_skip_disabled_checks(to_disable, plugin, task_vars, monke
assert not skipped(result)
+def test_action_plugin_run_list_checks(monkeypatch):
+ task = FakeTask('openshift_health_check', {'checks': []})
+ plugin = ActionModule(task, None, PlayContext(), None, None, None)
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {})
+ result = plugin.run()
+
+ assert failed(result, msg_has="Available checks")
+ assert not changed(result)
+ assert not skipped(result)
+
+
def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch):
check_return_value = {'ok': 'test'}
- check_class = fake_check(run_return=check_return_value)
- monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
+ check_class = fake_check(run_return=check_return_value, run_files=[None])
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
result = plugin.run(tmp=None, task_vars=task_vars)
@@ -145,7 +174,7 @@ def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch):
def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch):
check_return_value = {'ok': 'test'}
check_class = fake_check(run_return=check_return_value, changed=True)
- monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
result = plugin.run(tmp=None, task_vars=task_vars)
@@ -158,9 +187,9 @@ def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch):
def test_action_plugin_run_check_fail(plugin, task_vars, monkeypatch):
- check_return_value = {'failed': True}
+ check_return_value = {'failed': True, 'msg': 'this is a failure'}
check_class = fake_check(run_return=check_return_value)
- monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
result = plugin.run(tmp=None, task_vars=task_vars)
@@ -171,24 +200,51 @@ def test_action_plugin_run_check_fail(plugin, task_vars, monkeypatch):
assert not skipped(result)
-def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch):
+@pytest.mark.parametrize('exc_class, expect_traceback', [
+ (OpenShiftCheckException, False),
+ (Exception, True),
+])
+def test_action_plugin_run_check_exception(plugin, task_vars, exc_class, expect_traceback, monkeypatch):
exception_msg = 'fake check has an exception'
- run_exception = OpenShiftCheckException(exception_msg)
+ run_exception = exc_class(exception_msg)
check_class = fake_check(run_exception=run_exception, changed=True)
- monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
result = plugin.run(tmp=None, task_vars=task_vars)
assert failed(result['checks']['fake_check'], msg_has=exception_msg)
+ assert expect_traceback == ("Traceback" in result['checks']['fake_check']['msg'])
assert failed(result, msg_has=['failed'])
assert changed(result['checks']['fake_check'])
assert changed(result)
assert not skipped(result)
+def test_action_plugin_run_check_output_dir(plugin, task_vars, tmpdir, monkeypatch):
+ check_class = fake_check(
+ run_return={},
+ run_logs=[('thing', 'note')],
+ run_files=[
+ FileToSave('save.file', 'contents', None),
+ FileToSave('save.file', 'duplicate', None),
+ FileToSave('copy.file', None, 'foo'), # note: copy runs execute_module => exception
+ ],
+ )
+ task_vars['openshift_checks_output_dir'] = str(tmpdir)
+ check_class.get_var = lambda self, name, **_: task_vars.get(name)
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()})
+ monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
+
+ plugin.run(tmp=None, task_vars=task_vars)
+ assert any(path.basename == task_vars['ansible_host'] for path in tmpdir.listdir())
+ assert any(path.basename == 'fake_check.log.json' for path in tmpdir.visit())
+ assert any(path.basename == 'save.file' for path in tmpdir.visit())
+ assert any(path.basename == 'save.file.2' for path in tmpdir.visit())
+
+
def test_action_plugin_resolve_checks_exception(plugin, task_vars, monkeypatch):
- monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {})
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {})
result = plugin.run(tmp=None, task_vars=task_vars)
@@ -254,3 +310,38 @@ def test_resolve_checks_failure(names, all_checks, words_in_exception):
resolve_checks(names, all_checks)
for word in words_in_exception:
assert word in str(excinfo.value)
+
+
+@pytest.mark.parametrize('give_output_dir, result, expect_file', [
+ (False, None, False),
+ (True, dict(content="c3BhbQo=", encoding="base64"), True),
+ (True, dict(content="encoding error", encoding="base64"), False),
+ (True, dict(content="spam", no_encoding=None), True),
+ (True, dict(failed=True, msg="could not slurp"), False),
+])
+def test_copy_remote_file_to_dir(give_output_dir, result, expect_file, tmpdir):
+ check = fake_check()()
+ check.execute_module = lambda *args, **_: result
+ copy_remote_file_to_dir(check, "remote_file", str(tmpdir) if give_output_dir else "", "local_file")
+ assert expect_file == any(path.basename == "local_file" for path in tmpdir.listdir())
+
+
+def test_write_to_output_exceptions(tmpdir, monkeypatch, capsys):
+
+ class Spam(object):
+ def __str__(self):
+ raise Exception("break str")
+
+ test = {1: object(), 2: Spam()}
+ test[3] = test
+ write_result_to_output_dir(str(tmpdir), test)
+ assert "Error writing" in test["output_files"]
+
+ output_dir = tmpdir.join("eggs")
+ output_dir.write("spam") # so now it's not a dir
+ write_to_output_file(str(output_dir), "somefile", "somedata")
+ assert "Could not write" in capsys.readouterr()[1]
+
+ monkeypatch.setattr("openshift_health_check.prepare_output_dir", lambda *_: False)
+ write_result_to_output_dir(str(tmpdir), test)
+ assert "Error creating" in test["output_files"]
diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py
index f4fd2dfed..9ae679b79 100644
--- a/roles/openshift_health_checker/test/disk_availability_test.py
+++ b/roles/openshift_health_checker/test/disk_availability_test.py
@@ -183,11 +183,12 @@ def test_fails_with_insufficient_disk_space(name, group_names, configured_min, a
ansible_mounts=ansible_mounts,
)
- result = DiskAvailability(fake_execute_module, task_vars).run()
+ check = DiskAvailability(fake_execute_module, task_vars)
+ check.run()
- assert result['failed']
+ assert check.failures
for chunk in 'below recommended'.split() + expect_chunks:
- assert chunk in result.get('msg', '')
+ assert chunk in str(check.failures[0])
@pytest.mark.parametrize('name,group_names,context,ansible_mounts,failed,extra_words', [
@@ -237,11 +238,11 @@ def test_min_required_space_changes_with_upgrade_context(name, group_names, cont
)
check = DiskAvailability(fake_execute_module, task_vars)
- result = check.run()
+ check.run()
- assert result.get("failed", False) == failed
+ assert bool(check.failures) == failed
for word in extra_words:
- assert word in result.get('msg', '')
+ assert word in str(check.failures[0])
def fake_execute_module(*args):
diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py
index 6a7c16c7e..952fa9aa6 100644
--- a/roles/openshift_health_checker/test/docker_image_availability_test.py
+++ b/roles/openshift_health_checker/test/docker_image_availability_test.py
@@ -23,8 +23,6 @@ def task_vars():
@pytest.mark.parametrize('deployment_type, is_containerized, group_names, expect_active', [
("origin", True, [], True),
("openshift-enterprise", True, [], True),
- ("enterprise", True, [], False),
- ("online", True, [], False),
("invalid", True, [], False),
("", True, [], False),
("origin", False, [], False),
@@ -103,6 +101,39 @@ def test_all_images_unavailable(task_vars):
assert "required Docker images are not available" in actual['msg']
+def test_no_known_registries():
+ def execute_module(module_name=None, *_):
+ if module_name == "command":
+ return {
+ 'failed': True,
+ }
+
+ return {
+ 'changed': False,
+ }
+
+ def mock_known_docker_registries():
+ return []
+
+ dia = DockerImageAvailability(execute_module, task_vars=dict(
+ openshift=dict(
+ common=dict(
+ service_type='origin',
+ is_containerized=False,
+ is_atomic=False,
+ ),
+ docker=dict(additional_registries=["docker.io"]),
+ ),
+ openshift_deployment_type="openshift-enterprise",
+ openshift_image_tag='latest',
+ group_names=['nodes', 'masters'],
+ ))
+ dia.known_docker_registries = mock_known_docker_registries
+ actual = dia.run()
+ assert actual['failed']
+ assert "Unable to retrieve any docker registries." in actual['msg']
+
+
@pytest.mark.parametrize("message,extra_words", [
(
"docker image update failure",
diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py
index 09bacd9ac..3fa5e8929 100644
--- a/roles/openshift_health_checker/test/elasticsearch_test.py
+++ b/roles/openshift_health_checker/test/elasticsearch_test.py
@@ -72,7 +72,7 @@ def test_check_elasticsearch():
assert_error_in_list('NoRunningPods', excinfo.value)
# canned oc responses to match so all the checks pass
- def exec_oc(cmd, args):
+ def exec_oc(cmd, args, **_):
if '_cat/master' in cmd:
return 'name logging-es'
elif '/_nodes' in cmd:
@@ -97,7 +97,7 @@ def test_check_running_es_pods():
def test_check_elasticsearch_masters():
pods = [plain_es_pod]
- check = canned_elasticsearch(task_vars_config_base, lambda *_: plain_es_pod['_test_master_name_str'])
+ check = canned_elasticsearch(task_vars_config_base, lambda *args, **_: plain_es_pod['_test_master_name_str'])
assert not check.check_elasticsearch_masters(pods_by_name(pods))
@@ -117,7 +117,7 @@ def test_check_elasticsearch_masters():
])
def test_check_elasticsearch_masters_error(pods, expect_error):
test_pods = list(pods)
- check = canned_elasticsearch(task_vars_config_base, lambda *_: test_pods.pop(0)['_test_master_name_str'])
+ check = canned_elasticsearch(task_vars_config_base, lambda *args, **_: test_pods.pop(0)['_test_master_name_str'])
assert_error_in_list(expect_error, check.check_elasticsearch_masters(pods_by_name(pods)))
@@ -129,7 +129,7 @@ es_node_list = {
def test_check_elasticsearch_node_list():
- check = canned_elasticsearch(task_vars_config_base, lambda *_: json.dumps(es_node_list))
+ check = canned_elasticsearch(task_vars_config_base, lambda *args, **_: json.dumps(es_node_list))
assert not check.check_elasticsearch_node_list(pods_by_name([plain_es_pod]))
@@ -151,13 +151,13 @@ def test_check_elasticsearch_node_list():
),
])
def test_check_elasticsearch_node_list_errors(pods, node_list, expect_error):
- check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(node_list))
+ check = canned_elasticsearch(task_vars_config_base, lambda cmd, args, **_: json.dumps(node_list))
assert_error_in_list(expect_error, check.check_elasticsearch_node_list(pods_by_name(pods)))
def test_check_elasticsearch_cluster_health():
test_health_data = [{"status": "green"}]
- check = canned_elasticsearch(exec_oc=lambda *_: json.dumps(test_health_data.pop(0)))
+ check = canned_elasticsearch(exec_oc=lambda *args, **_: json.dumps(test_health_data.pop(0)))
assert not check.check_es_cluster_health(pods_by_name([plain_es_pod]))
@@ -175,12 +175,12 @@ def test_check_elasticsearch_cluster_health():
])
def test_check_elasticsearch_cluster_health_errors(pods, health_data, expect_error):
test_health_data = list(health_data)
- check = canned_elasticsearch(exec_oc=lambda *_: json.dumps(test_health_data.pop(0)))
+ check = canned_elasticsearch(exec_oc=lambda *args, **_: json.dumps(test_health_data.pop(0)))
assert_error_in_list(expect_error, check.check_es_cluster_health(pods_by_name(pods)))
def test_check_elasticsearch_diskspace():
- check = canned_elasticsearch(exec_oc=lambda *_: 'IUse% Use%\n 3% 4%\n')
+ check = canned_elasticsearch(exec_oc=lambda *args, **_: 'IUse% Use%\n 3% 4%\n')
assert not check.check_elasticsearch_diskspace(pods_by_name([plain_es_pod]))
@@ -199,5 +199,5 @@ def test_check_elasticsearch_diskspace():
),
])
def test_check_elasticsearch_diskspace_errors(disk_data, expect_error):
- check = canned_elasticsearch(exec_oc=lambda *_: disk_data)
+ check = canned_elasticsearch(exec_oc=lambda *args, **_: disk_data)
assert_error_in_list(expect_error, check.check_elasticsearch_diskspace(pods_by_name([plain_es_pod])))
diff --git a/roles/openshift_health_checker/test/logging_index_time_test.py b/roles/openshift_health_checker/test/logging_index_time_test.py
index 22566b295..c48ade9b8 100644
--- a/roles/openshift_health_checker/test/logging_index_time_test.py
+++ b/roles/openshift_health_checker/test/logging_index_time_test.py
@@ -102,7 +102,7 @@ def test_with_running_pods():
),
], ids=lambda argval: argval[0])
def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout):
- check = canned_loggingindextime(lambda *_: json.dumps(json_response))
+ check = canned_loggingindextime(lambda *args, **_: json.dumps(json_response))
check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout)
@@ -131,7 +131,7 @@ def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout):
)
], ids=lambda argval: argval[0])
def test_wait_until_cmd_or_err(name, json_response, timeout, expect_error):
- check = canned_loggingindextime(lambda *_: json.dumps(json_response))
+ check = canned_loggingindextime(lambda *args, **_: json.dumps(json_response))
with pytest.raises(OpenShiftCheckException) as error:
check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, SAMPLE_UUID, timeout)
@@ -139,7 +139,7 @@ def test_wait_until_cmd_or_err(name, json_response, timeout, expect_error):
def test_curl_kibana_with_uuid():
- check = canned_loggingindextime(lambda *_: json.dumps({"statusCode": 404}))
+ check = canned_loggingindextime(lambda *args, **_: json.dumps({"statusCode": 404}))
check.generate_uuid = lambda: SAMPLE_UUID
assert SAMPLE_UUID == check.curl_kibana_with_uuid(plain_running_kibana_pod)
@@ -161,7 +161,7 @@ def test_curl_kibana_with_uuid():
),
], ids=lambda argval: argval[0])
def test_failed_curl_kibana_with_uuid(name, json_response, expect_error):
- check = canned_loggingindextime(lambda *_: json.dumps(json_response))
+ check = canned_loggingindextime(lambda *args, **_: json.dumps(json_response))
check.generate_uuid = lambda: SAMPLE_UUID
with pytest.raises(OpenShiftCheckException) as error:
diff --git a/roles/openshift_health_checker/test/openshift_check_test.py b/roles/openshift_health_checker/test/openshift_check_test.py
index 789784c77..bc0c3b26c 100644
--- a/roles/openshift_health_checker/test/openshift_check_test.py
+++ b/roles/openshift_health_checker/test/openshift_check_test.py
@@ -106,13 +106,40 @@ def test_get_var_convert(task_vars, keys, convert, expected):
assert dummy_check(task_vars).get_var(*keys, convert=convert) == expected
-@pytest.mark.parametrize("keys, convert", [
- (("bar", "baz"), int),
- (("bar.baz"), float),
- (("foo"), "bogus"),
- (("foo"), lambda a, b: 1),
- (("foo"), lambda a: 1 / 0),
+def convert_oscexc(_):
+ raise OpenShiftCheckException("known failure")
+
+
+def convert_exc(_):
+ raise Exception("failure unknown")
+
+
+@pytest.mark.parametrize("keys, convert, expect_text", [
+ (("bar", "baz"), int, "Cannot convert"),
+ (("bar.baz",), float, "Cannot convert"),
+ (("foo",), "bogus", "TypeError"),
+ (("foo",), lambda a, b: 1, "TypeError"),
+ (("foo",), lambda a: 1 / 0, "ZeroDivisionError"),
+ (("foo",), convert_oscexc, "known failure"),
+ (("foo",), convert_exc, "failure unknown"),
])
-def test_get_var_convert_error(task_vars, keys, convert):
- with pytest.raises(OpenShiftCheckException):
+def test_get_var_convert_error(task_vars, keys, convert, expect_text):
+ with pytest.raises(OpenShiftCheckException) as excinfo:
dummy_check(task_vars).get_var(*keys, convert=convert)
+ assert expect_text in str(excinfo.value)
+
+
+def test_register(task_vars):
+ check = dummy_check(task_vars)
+
+ check.register_failure(OpenShiftCheckException("spam"))
+ assert "spam" in str(check.failures[0])
+
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.register_file("spam") # no file contents specified
+ assert "not specified" in str(excinfo.value)
+
+ # normally execute_module registers the result file; test disabling that
+ check._execute_module = lambda *args, **_: dict()
+ check.execute_module("eggs", module_args={}, register=False)
+ assert not check.files_to_save
diff --git a/roles/openshift_health_checker/test/ovs_version_test.py b/roles/openshift_health_checker/test/ovs_version_test.py
index e1bf29d2a..602f32989 100644
--- a/roles/openshift_health_checker/test/ovs_version_test.py
+++ b/roles/openshift_health_checker/test/ovs_version_test.py
@@ -50,7 +50,7 @@ def test_ovs_package_version(openshift_release, expected_ovs_version):
openshift_release=openshift_release,
openshift_image_tag='v' + openshift_release,
)
- return_value = object()
+ return_value = {} # note: check.execute_module modifies return hash contents
def execute_module(module_name=None, module_args=None, *_):
assert module_name == 'rpm_version'
diff --git a/roles/openshift_health_checker/test/package_availability_test.py b/roles/openshift_health_checker/test/package_availability_test.py
index 8aa87ca59..b34e8fbfc 100644
--- a/roles/openshift_health_checker/test/package_availability_test.py
+++ b/roles/openshift_health_checker/test/package_availability_test.py
@@ -49,7 +49,7 @@ def test_is_active(pkg_mgr, is_containerized, is_active):
),
])
def test_package_availability(task_vars, must_have_packages, must_not_have_packages):
- return_value = object()
+ return_value = {}
def execute_module(module_name=None, module_args=None, *_):
assert module_name == 'check_yum_update'
diff --git a/roles/openshift_health_checker/test/package_update_test.py b/roles/openshift_health_checker/test/package_update_test.py
index 7d9035a36..85d3c9cab 100644
--- a/roles/openshift_health_checker/test/package_update_test.py
+++ b/roles/openshift_health_checker/test/package_update_test.py
@@ -2,7 +2,7 @@ from openshift_checks.package_update import PackageUpdate
def test_package_update():
- return_value = object()
+ return_value = {}
def execute_module(module_name=None, module_args=None, *_):
assert module_name == 'check_yum_update'
diff --git a/roles/openshift_hosted/tasks/registry/secure.yml b/roles/openshift_hosted/tasks/registry/secure.yml
index a8a6f6fc8..434b679df 100644
--- a/roles/openshift_hosted/tasks/registry/secure.yml
+++ b/roles/openshift_hosted/tasks/registry/secure.yml
@@ -1,7 +1,7 @@
---
- name: Configure facts for docker-registry
set_fact:
- openshift_hosted_registry_routecertificates: "{{ ('routecertificates' in openshift.hosted.registry.keys()) | ternary(openshift.hosted.registry.routecertificates, {}) }}"
+ openshift_hosted_registry_routecertificates: "{{ ('routecertificates' in openshift.hosted.registry.keys()) | ternary(openshift_hosted_registry_routecertificates, {}) }}"
openshift_hosted_registry_routehost: "{{ ('routehost' in openshift.hosted.registry.keys()) | ternary(openshift.hosted.registry.routehost, False) }}"
openshift_hosted_registry_routetermination: "{{ ('routetermination' in openshift.hosted.registry.keys()) | ternary(openshift.hosted.registry.routetermination, 'passthrough') }}"
diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml
index a77df9986..de5e25061 100644
--- a/roles/openshift_logging/tasks/install_logging.yaml
+++ b/roles/openshift_logging/tasks/install_logging.yaml
@@ -134,6 +134,7 @@
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}"
openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}"
openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}"
+ openshift_logging_elasticsearch_nodeselector: "{{ openshift_logging_es_ops_nodeselector }}"
openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}"
openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}"
openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}"
@@ -165,6 +166,7 @@
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}"
openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}"
openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}"
+ openshift_logging_elasticsearch_nodeselector: "{{ openshift_logging_es_ops_nodeselector }}"
openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}"
openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}"
openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}"
diff --git a/roles/openshift_master/defaults/main.yml b/roles/openshift_master/defaults/main.yml
index 4c8d6fdad..73e935d3f 100644
--- a/roles/openshift_master/defaults/main.yml
+++ b/roles/openshift_master/defaults/main.yml
@@ -20,8 +20,8 @@ r_openshift_master_os_firewall_allow:
port: 4001/tcp
cond: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
-oreg_url: ''
-oreg_host: "{{ oreg_url.split('/')[0] if '.' in oreg_url.split('/')[0] else '' }}"
+# oreg_url is defined by user input
+oreg_host: "{{ oreg_url.split('/')[0] if (oreg_url is defined and '.' in oreg_url.split('/')[0]) else '' }}"
oreg_auth_credentials_path: "{{ r_openshift_master_data_dir }}/.docker"
oreg_auth_credentials_replace: False
l_bind_docker_reg_auth: False
diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml
index 894fe8e2b..94b7df1fc 100644
--- a/roles/openshift_master/tasks/main.yml
+++ b/roles/openshift_master/tasks/main.yml
@@ -177,9 +177,33 @@
local_facts:
no_proxy_etcd_host_ips: "{{ openshift_no_proxy_etcd_host_ips }}"
+- include: registry_auth.yml
+
- name: Install the systemd units
include: systemd_units.yml
+- name: Checking for journald.conf
+ stat: path=/etc/systemd/journald.conf
+ register: journald_conf_file
+
+- name: Update journald setup
+ replace:
+ dest: /etc/systemd/journald.conf
+ regexp: '^(\#| )?{{ item.var }}=\s*.*?$'
+ replace: ' {{ item.var }}={{ item.val }}'
+ backup: yes
+ with_items: "{{ journald_vars_to_replace | default([]) }}"
+ when: journald_conf_file.stat.exists
+ register: journald_update
+
+# I need to restart journald immediatelly, otherwise it gets into way during
+# further steps in ansible
+- name: Restart journald
+ systemd:
+ name: systemd-journald
+ state: restarted
+ when: journald_update | changed
+
- name: Install Master system container
include: system_container.yml
when:
@@ -200,7 +224,7 @@
- restart master api
- set_fact:
- translated_identity_providers: "{{ openshift.master.identity_providers | translate_idps('v1', openshift.common.version, openshift.common.deployment_type) }}"
+ translated_identity_providers: "{{ openshift.master.identity_providers | translate_idps('v1') }}"
# TODO: add the validate parameter when there is a validation command to run
- name: Create master config
@@ -229,8 +253,6 @@
- restart master controllers
when: openshift_master_bootstrap_enabled | default(False)
-- include: registry_auth.yml
-
- include: set_loopback_context.yml
when:
- openshift.common.version_gte_3_2_or_1_2
diff --git a/roles/openshift_master/vars/main.yml b/roles/openshift_master/vars/main.yml
index cf39b73f6..0c681c764 100644
--- a/roles/openshift_master/vars/main.yml
+++ b/roles/openshift_master/vars/main.yml
@@ -20,3 +20,22 @@ openshift_master_valid_grant_methods:
- deny
openshift_master_is_scaleup_host: False
+
+# These defaults assume forcing journald persistence, fsync to disk once
+# a second, rate-limiting to 10,000 logs a second, no forwarding to
+# syslog or wall, using 8GB of disk space maximum, using 10MB journal
+# files, keeping only a days worth of logs per journal file, and
+# retaining journal files no longer than a month.
+journald_vars_to_replace:
+- { var: Storage, val: persistent }
+- { var: Compress, val: yes }
+- { var: SyncIntervalSec, val: 1s }
+- { var: RateLimitInterval, val: 1s }
+- { var: RateLimitBurst, val: 10000 }
+- { var: SystemMaxUse, val: 8G }
+- { var: SystemKeepFree, val: 20% }
+- { var: SystemMaxFileSize, val: 10M }
+- { var: MaxRetentionSec, val: 1month }
+- { var: MaxFileSec, val: 1day }
+- { var: ForwardToSyslog, val: no }
+- { var: ForwardToWall, val: no }
diff --git a/roles/openshift_master_facts/filter_plugins/openshift_master.py b/roles/openshift_master_facts/filter_plugins/openshift_master.py
index 56c864ec7..f7f3ac2b1 100644
--- a/roles/openshift_master_facts/filter_plugins/openshift_master.py
+++ b/roles/openshift_master_facts/filter_plugins/openshift_master.py
@@ -6,10 +6,6 @@ Custom filters for use in openshift-master
import copy
import sys
-# pylint import-error disabled because pylint cannot find the package
-# when installed in a virtualenv
-from distutils.version import LooseVersion # pylint: disable=no-name-in-module,import-error
-
from ansible import errors
from ansible.parsing.yaml.dumper import AnsibleDumper
from ansible.plugins.filter.core import to_bool as ansible_bool
@@ -82,23 +78,8 @@ class IdentityProviderBase(object):
self._allow_additional = True
@staticmethod
- def validate_idp_list(idp_list, openshift_version, deployment_type):
+ def validate_idp_list(idp_list):
''' validates a list of idps '''
- login_providers = [x.name for x in idp_list if x.login]
-
- multiple_logins_unsupported = False
- if len(login_providers) > 1:
- if deployment_type in ['enterprise', 'online', 'atomic-enterprise', 'openshift-enterprise']:
- if LooseVersion(openshift_version) < LooseVersion('3.2'):
- multiple_logins_unsupported = True
- if deployment_type in ['origin']:
- if LooseVersion(openshift_version) < LooseVersion('1.2'):
- multiple_logins_unsupported = True
- if multiple_logins_unsupported:
- raise errors.AnsibleFilterError("|failed multiple providers are "
- "not allowed for login. login "
- "providers: {0}".format(', '.join(login_providers)))
-
names = [x.name for x in idp_list]
if len(set(names)) != len(names):
raise errors.AnsibleFilterError("|failed more than one provider configured with the same name")
@@ -471,7 +452,7 @@ class FilterModule(object):
''' Custom ansible filters for use by the openshift_master role'''
@staticmethod
- def translate_idps(idps, api_version, openshift_version, deployment_type):
+ def translate_idps(idps, api_version):
''' Translates a list of dictionaries into a valid identityProviders config '''
idp_list = []
@@ -487,7 +468,7 @@ class FilterModule(object):
idp_inst.set_provider_items()
idp_list.append(idp_inst)
- IdentityProviderBase.validate_idp_list(idp_list, openshift_version, deployment_type)
+ IdentityProviderBase.validate_idp_list(idp_list)
return u(yaml.dump([idp.to_dict() for idp in idp_list],
allow_unicode=True,
default_flow_style=False,
diff --git a/roles/openshift_metrics/README.md b/roles/openshift_metrics/README.md
index 1f10de4a2..ed698daca 100644
--- a/roles/openshift_metrics/README.md
+++ b/roles/openshift_metrics/README.md
@@ -39,6 +39,8 @@ For default values, see [`defaults/main.yaml`](defaults/main.yaml).
- `openshift_metrics_hawkular_replicas:` The number of replicas for Hawkular metrics.
+- `openshift_metrics_hawkular_route_annotations`: Dictionary with annotations for the Hawkular route.
+
- `openshift_metrics_cassandra_replicas`: The number of Cassandra nodes to deploy for the
initial cluster.
diff --git a/roles/openshift_metrics/defaults/main.yaml b/roles/openshift_metrics/defaults/main.yaml
index d9a17ae7f..f45100be3 100644
--- a/roles/openshift_metrics/defaults/main.yaml
+++ b/roles/openshift_metrics/defaults/main.yaml
@@ -12,6 +12,7 @@ openshift_metrics_hawkular_cert: ""
openshift_metrics_hawkular_key: ""
openshift_metrics_hawkular_ca: ""
openshift_metrics_hawkular_nodeselector: ""
+openshift_metrics_hawkular_route_annotations: {}
openshift_metrics_cassandra_replicas: 1
openshift_metrics_cassandra_storage_type: "{{ openshift_hosted_metrics_storage_kind | default('emptydir') }}"
diff --git a/roles/openshift_metrics/tasks/install_hawkular.yaml b/roles/openshift_metrics/tasks/install_hawkular.yaml
index 6b37f85ab..b63f5ca8c 100644
--- a/roles/openshift_metrics/tasks/install_hawkular.yaml
+++ b/roles/openshift_metrics/tasks/install_hawkular.yaml
@@ -40,6 +40,7 @@
dest: "{{ mktemp.stdout }}/templates/hawkular-metrics-route.yaml"
vars:
name: hawkular-metrics
+ annotations: "{{ openshift_metrics_hawkular_route_annotations }}"
labels:
metrics-infra: hawkular-metrics
host: "{{ openshift_metrics_hawkular_hostname }}"
diff --git a/roles/openshift_metrics/templates/route.j2 b/roles/openshift_metrics/templates/route.j2
index 423ab54a3..253d6ecf5 100644
--- a/roles/openshift_metrics/templates/route.j2
+++ b/roles/openshift_metrics/templates/route.j2
@@ -2,6 +2,9 @@ apiVersion: v1
kind: Route
metadata:
name: {{ name }}
+{% if annotations is defined %}
+ annotations: {{ annotations | to_yaml }}
+{% endif %}
{% if labels is defined and labels %}
labels:
{% for k, v in labels.iteritems() %}
diff --git a/roles/openshift_node/defaults/main.yml b/roles/openshift_node/defaults/main.yml
index 433e92201..ed3516d04 100644
--- a/roles/openshift_node/defaults/main.yml
+++ b/roles/openshift_node/defaults/main.yml
@@ -82,8 +82,8 @@ default_r_openshift_node_os_firewall_allow:
# Allow multiple port ranges to be added to the role
r_openshift_node_os_firewall_allow: "{{ default_r_openshift_node_os_firewall_allow | union(openshift_node_open_ports | default([])) }}"
-oreg_url: ''
-oreg_host: "{{ oreg_url.split('/')[0] if '.' in oreg_url.split('/')[0] else '' }}"
+# oreg_url is defined by user input
+oreg_host: "{{ oreg_url.split('/')[0] if (oreg_url is defined and '.' in oreg_url.split('/')[0]) else '' }}"
oreg_auth_credentials_path: "{{ openshift_node_data_dir }}/.docker"
oreg_auth_credentials_replace: False
l_bind_docker_reg_auth: False
diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml
index 28238a8fa..e82fb42b8 100644
--- a/roles/openshift_node/tasks/main.yml
+++ b/roles/openshift_node/tasks/main.yml
@@ -2,7 +2,8 @@
- fail:
msg: "SELinux is disabled, This deployment type requires that SELinux is enabled."
when:
- - (not ansible_selinux or ansible_selinux.status != 'enabled') and deployment_type in ['enterprise', 'online', 'atomic-enterprise', 'openshift-enterprise']
+ - (not ansible_selinux or ansible_selinux.status != 'enabled')
+ - deployment_type == 'openshift-enterprise'
- not openshift_use_crio | default(false)
- name: setup firewall
@@ -69,12 +70,12 @@
include: bootstrap.yml
when: openshift_node_bootstrap
+- include: registry_auth.yml
+
- name: include standard node config
include: config.yml
when: not openshift_node_bootstrap
-- include: registry_auth.yml
-
- name: Configure AWS Cloud Provider Settings
lineinfile:
dest: /etc/sysconfig/{{ openshift.common.service_type }}-node
diff --git a/roles/openshift_node/tasks/node_system_container.yml b/roles/openshift_node/tasks/node_system_container.yml
index b2dceedbe..0ca44c292 100644
--- a/roles/openshift_node/tasks/node_system_container.yml
+++ b/roles/openshift_node/tasks/node_system_container.yml
@@ -9,4 +9,6 @@
oc_atomic_container:
name: "{{ openshift.common.service_type }}-node"
image: "{{ 'docker:' if openshift.common.system_images_registry == 'docker' else openshift.common.system_images_registry + '/' }}{{ openshift.node.node_system_image }}:{{ openshift_image_tag }}"
+ values:
+ - "DNS_DOMAIN={{ openshift.common.dns_domain }}"
state: latest
diff --git a/roles/openshift_prometheus/defaults/main.yaml b/roles/openshift_prometheus/defaults/main.yaml
index 18d6a1645..5aa8aecec 100644
--- a/roles/openshift_prometheus/defaults/main.yaml
+++ b/roles/openshift_prometheus/defaults/main.yaml
@@ -11,7 +11,7 @@ openshift_prometheus_node_selector: {"region":"infra"}
openshift_prometheus_image_proxy: "openshift/oauth-proxy:v1.0.0"
openshift_prometheus_image_prometheus: "openshift/prometheus:v2.0.0-dev"
openshift_prometheus_image_alertmanager: "openshift/prometheus-alertmanager:dev"
-openshift_prometheus_image_alertbuffer: "ilackarms/message-buffer"
+openshift_prometheus_image_alertbuffer: "openshift/prometheus-alert-buffer:v0.0.1"
# additional prometheus rules file
openshift_prometheus_additional_rules_file: null
diff --git a/roles/openshift_prometheus/tasks/install_prometheus.yaml b/roles/openshift_prometheus/tasks/install_prometheus.yaml
index 93bdda3e8..a9bce2fb1 100644
--- a/roles/openshift_prometheus/tasks/install_prometheus.yaml
+++ b/roles/openshift_prometheus/tasks/install_prometheus.yaml
@@ -107,7 +107,10 @@
- name: annotate prometheus service
command: >
{{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
- service prometheus 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls'
+ service prometheus
+ prometheus.io/scrape='true'
+ prometheus.io/scheme=https
+ service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls
- name: annotate alerts service
command: >
diff --git a/roles/openshift_repos/README.md b/roles/openshift_repos/README.md
index abd1997dd..ce3b51454 100644
--- a/roles/openshift_repos/README.md
+++ b/roles/openshift_repos/README.md
@@ -1,4 +1,4 @@
-OpenShift Repos
+OpenShift Repos
================
Configures repositories for an OpenShift installation
@@ -12,10 +12,10 @@ rhel-7-server-extra-rpms, and rhel-7-server-ose-3.0-rpms repos.
Role Variables
--------------
-| Name | Default value | |
-|-------------------------------|---------------|------------------------------------|
-| openshift_deployment_type | None | Possible values enterprise, origin |
-| openshift_additional_repos | {} | TODO |
+| Name | Default value | |
+|-------------------------------|---------------|----------------------------------------------|
+| openshift_deployment_type | None | Possible values openshift-enterprise, origin |
+| openshift_additional_repos | {} | TODO |
Dependencies
------------
diff --git a/roles/openshift_sanitize_inventory/vars/main.yml b/roles/openshift_sanitize_inventory/vars/main.yml
index da48e42c1..37e88758d 100644
--- a/roles/openshift_sanitize_inventory/vars/main.yml
+++ b/roles/openshift_sanitize_inventory/vars/main.yml
@@ -1,7 +1,4 @@
---
# origin uses community packages named 'origin'
-# online currently uses 'openshift' packages
-# enterprise is used for OSE 3.0 < 3.1 which uses packages named 'openshift'
-# atomic-enterprise uses Red Hat packages named 'atomic-openshift'
-# openshift-enterprise uses Red Hat packages named 'atomic-openshift' starting with OSE 3.1
-known_openshift_deployment_types: ['origin', 'online', 'enterprise', 'atomic-enterprise', 'openshift-enterprise']
+# openshift-enterprise uses Red Hat packages named 'atomic-openshift'
+known_openshift_deployment_types: ['origin', 'openshift-enterprise']
diff --git a/roles/openshift_service_catalog/tasks/install.yml b/roles/openshift_service_catalog/tasks/install.yml
index 746c73eaf..d134867bd 100644
--- a/roles/openshift_service_catalog/tasks/install.yml
+++ b/roles/openshift_service_catalog/tasks/install.yml
@@ -6,8 +6,6 @@
register: mktemp
changed_when: False
-- include: wire_aggregator.yml
-
- name: Set default image variables based on deployment_type
include_vars: "{{ item }}"
with_first_found:
diff --git a/roles/openshift_service_catalog/tasks/wire_aggregator.yml b/roles/openshift_service_catalog/tasks/wire_aggregator.yml
index 1c788470a..6431c6d3f 100644
--- a/roles/openshift_service_catalog/tasks/wire_aggregator.yml
+++ b/roles/openshift_service_catalog/tasks/wire_aggregator.yml
@@ -18,11 +18,10 @@
changed_when: false
delegate_to: "{{ first_master }}"
-
# TODO: this currently has a bug where hostnames are required
- name: Creating First Master Aggregator signer certs
command: >
- oc adm ca create-signer-cert
+ {{ hostvars[first_master].openshift.common.client_binary }} adm ca create-signer-cert
--cert=/etc/origin/master/front-proxy-ca.crt
--key=/etc/origin/master/front-proxy-ca.key
--serial=/etc/origin/master/ca.serial.txt
@@ -79,7 +78,7 @@
- name: Create first master api-client config for Aggregator
command: >
- oc adm create-api-client-config
+ {{ hostvars[first_master].openshift.common.client_binary }} adm create-api-client-config
--certificate-authority=/etc/origin/master/front-proxy-ca.crt
--signer-cert=/etc/origin/master/front-proxy-ca.crt
--signer-key=/etc/origin/master/front-proxy-ca.key
diff --git a/roles/openshift_version/defaults/main.yml b/roles/openshift_version/defaults/main.yml
index 01a1a7472..53d10f1f8 100644
--- a/roles/openshift_version/defaults/main.yml
+++ b/roles/openshift_version/defaults/main.yml
@@ -1,2 +1,3 @@
---
openshift_protect_installed_version: True
+version_install_base_package: False
diff --git a/roles/openshift_version/tasks/main.yml b/roles/openshift_version/tasks/main.yml
index 204abe27e..1ff99adf8 100644
--- a/roles/openshift_version/tasks/main.yml
+++ b/roles/openshift_version/tasks/main.yml
@@ -5,11 +5,15 @@
is_containerized: "{{ openshift.common.is_containerized | default(False) | bool }}"
is_atomic: "{{ openshift.common.is_atomic | default(False) | bool }}"
+# This is only needed on masters and nodes; version_install_base_package
+# should be set by a play externally.
- name: Install the base package for versioning
package:
name: "{{ openshift.common.service_type }}{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) }}"
state: present
- when: not is_containerized | bool
+ when:
+ - not is_containerized | bool
+ - version_install_base_package | bool
# Block attempts to install origin without specifying some kind of version information.
# This is because the latest tags for origin are usually alpha builds, which should not
diff --git a/roles/rhel_subscribe/tasks/enterprise.yml b/roles/rhel_subscribe/tasks/enterprise.yml
index 39d59db70..9738929d2 100644
--- a/roles/rhel_subscribe/tasks/enterprise.yml
+++ b/roles/rhel_subscribe/tasks/enterprise.yml
@@ -3,20 +3,17 @@
command: subscription-manager repos --disable="*"
- set_fact:
- default_ose_version: '3.0'
- when: deployment_type == 'enterprise'
-
-- set_fact:
default_ose_version: '3.6'
- when: deployment_type in ['atomic-enterprise', 'openshift-enterprise']
+ when: deployment_type == 'openshift-enterprise'
- set_fact:
ose_version: "{{ lookup('oo_option', 'ose_version') | default(default_ose_version, True) }}"
- fail:
msg: "{{ ose_version }} is not a valid version for {{ deployment_type }} deployment type"
- when: ( deployment_type == 'enterprise' and ose_version not in ['3.0'] ) or
- ( deployment_type in ['atomic-enterprise', 'openshift-enterprise'] and ose_version not in ['3.1', '3.2', '3.3', '3.4', '3.5', '3.6'] )
+ when:
+ - deployment_type == 'openshift-enterprise'
+ - ose_version not in ['3.1', '3.2', '3.3', '3.4', '3.5', '3.6'] )
- name: Enable RHEL repositories
command: subscription-manager repos \
diff --git a/roles/rhel_subscribe/tasks/main.yml b/roles/rhel_subscribe/tasks/main.yml
index 453044a6e..c43e5513d 100644
--- a/roles/rhel_subscribe/tasks/main.yml
+++ b/roles/rhel_subscribe/tasks/main.yml
@@ -41,15 +41,19 @@
redhat_subscription:
username: "{{ rhel_subscription_user }}"
password: "{{ rhel_subscription_pass }}"
+ register: rh_subscription
+ until: rh_subscription | succeeded
- name: Retrieve the OpenShift Pool ID
command: subscription-manager list --available --matches="{{ rhel_subscription_pool }}" --pool-only
register: openshift_pool_id
+ until: openshift_pool_id | succeeded
changed_when: False
- name: Determine if OpenShift Pool Already Attached
command: subscription-manager list --consumed --matches="{{ rhel_subscription_pool }}" --pool-only
register: openshift_pool_attached
+ until: openshift_pool_attached | succeeded
changed_when: False
when: openshift_pool_id.stdout == ''
@@ -58,10 +62,12 @@
when: openshift_pool_id.stdout == '' and openshift_pool_attached is defined and openshift_pool_attached.stdout == ''
- name: Attach to OpenShift Pool
- command: subscription-manager subscribe --pool {{ openshift_pool_id.stdout_lines[0] }}
+ command: subscription-manager attach --pool {{ openshift_pool_id.stdout_lines[0] }}
+ register: subscribe_pool
+ until: subscribe_pool | succeeded
when: openshift_pool_id.stdout != ''
- include: enterprise.yml
when:
- - deployment_type in [ 'enterprise', 'atomic-enterprise', 'openshift-enterprise' ]
+ - deployment_type == 'openshift-enterprise'
- not ostree_booted.stat.exists | bool