diff options
Diffstat (limited to 'playbooks/openshift-checks')
18 files changed, 351 insertions, 0 deletions
diff --git a/playbooks/openshift-checks/README.md b/playbooks/openshift-checks/README.md new file mode 100644 index 000000000..0b7ea91ff --- /dev/null +++ b/playbooks/openshift-checks/README.md @@ -0,0 +1,104 @@ +# OpenShift health checks + +This directory contains Ansible playbooks for detecting potential problems prior +to an install, as well as health checks to run on existing OpenShift clusters. + +Ansible's default operation mode is to fail fast, on the first error. However, +when performing checks, it is useful to gather as much information about +problems as possible in a single run. + +Thus, the playbooks run a battery of checks against the inventory hosts and +gather intermediate errors, giving a more complete diagnostic of the state of +each host. If any check failed, the playbook run will be marked as failed. + +To facilitate understanding the problems that were encountered, a custom +callback plugin summarizes execution errors at the end of a playbook run. + +## Available playbooks + +1. Pre-install playbook ([pre-install.yml](pre-install.yml)) - verifies system + requirements and look for common problems that can prevent a successful + installation of a production cluster. + +2. Diagnostic playbook ([health.yml](health.yml)) - check an existing cluster + for known signs of problems. + +3. Certificate expiry playbooks ([certificate_expiry](certificate_expiry)) - + check that certificates in use are valid and not expiring soon. + +4. Adhoc playbook ([adhoc.yml](adhoc.yml)) - use it to run adhoc checks or to + list existing checks. + See the [next section](#the-adhoc-playbook) for a usage example. + +## Running + +With a [recent installation of Ansible](../../../README.md#setup), run the playbook +against your inventory file. Here is the step-by-step: + +1. If you haven't done it yet, clone this repository: + + ```console + $ git clone https://github.com/openshift/openshift-ansible + $ cd openshift-ansible + ``` + +2. Install the [dependencies](../../../README.md#setup) + +3. Run the appropriate playbook: + + ```console + $ ansible-playbook -i <inventory file> playbooks/openshift-checks/pre-install.yml + ``` + + or + + ```console + $ ansible-playbook -i <inventory file> playbooks/openshift-checks/health.yml + ``` + + or + + ```console + $ ansible-playbook -i <inventory file> playbooks/openshift-checks/certificate_expiry/default.yaml -v + ``` + +### The adhoc playbook + +The adhoc playbook gives flexibility to run any check or a custom group of +checks. What will be run is determined by the `openshift_checks` variable, +which, among other ways supported by Ansible, can be set on the command line +using the `-e` flag. + +For example, to run the `docker_storage` check: + +```console +$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml -e openshift_checks=docker_storage +``` + +To run more checks, use a comma-separated list of check names: + +```console +$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml -e openshift_checks=docker_storage,disk_availability +``` + +To run an entire class of checks, use the name of a check group tag, prefixed by `@`. This will run all checks tagged `preflight`: + +```console +$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml -e openshift_checks=@preflight +``` + +It is valid to specify multiple check tags and individual check names together +in a comma-separated list. + +To list all of the available checks and tags, run the adhoc playbook without +setting the `openshift_checks` variable: + +```console +$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml +``` + +## Running in a container + +This repository is built into a Docker image including Ansible so that it can +be run anywhere Docker is available, without the need to manually install dependencies. +Instructions for doing so may be found [in the README](../../../README_CONTAINER_IMAGE.md). diff --git a/playbooks/openshift-checks/adhoc.yml b/playbooks/openshift-checks/adhoc.yml new file mode 100644 index 000000000..414090733 --- /dev/null +++ b/playbooks/openshift-checks/adhoc.yml @@ -0,0 +1,25 @@ +--- +# NOTE: ideally this would be just part of a single play in +# private/adhoc.yml that lists the existing checks when +# openshift_checks is not set or run the requested checks. However, to actually +# run the checks we need to have the included dependencies to run first and that +# takes time. To speed up listing checks, we use this separate play that runs +# before the include of dependencies to save time and improve the UX. +- name: OpenShift health checks + # NOTE: though the openshift_checks variable could be potentially defined on + # individual hosts while not defined for localhost, we do not support that + # usage. Running this play only in localhost speeds up execution. + hosts: localhost + connection: local + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: adhoc + pre_tasks: + - name: List known health checks + action: openshift_health_check + when: openshift_checks is undefined or not openshift_checks + +- import_playbook: ../init/main.yml + +- import_playbook: private/adhoc.yml diff --git a/playbooks/openshift-checks/certificate_expiry/default.yaml b/playbooks/openshift-checks/certificate_expiry/default.yaml new file mode 100644 index 000000000..630135cae --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/default.yaml @@ -0,0 +1,10 @@ +--- +# Default behavior, you will need to ensure you run ansible with the +# -v option to see report results: + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml b/playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml new file mode 100644 index 000000000..378d1f154 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml @@ -0,0 +1,40 @@ +# This example generates HTML and JSON reports and +# +# Copies of the generated HTML and JSON reports are uploaded to the masters, +# which is particularly useful when this playbook is run from a container. +# +# All certificates (healthy or not) are included in the results +# +# Optional environment variables to alter the behaviour of the playbook: +# CERT_EXPIRY_WARN_DAYS: Length of the warning window in days (45) +# COPY_TO_PATH: path to copy reports to in the masters (/etc/origin/certificate_expiration_report) +--- +- name: Generate certificate expiration reports + hosts: nodes:masters:etcd + gather_facts: no + vars: + openshift_certificate_expiry_save_json_results: yes + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_show_all: yes + openshift_certificate_expiry_warning_days: "{{ lookup('env', 'CERT_EXPIRY_WARN_DAYS') | default('45', true) }}" + roles: + - role: openshift_certificate_expiry + +- name: Upload reports to master + hosts: masters + gather_facts: no + vars: + destination_path: "{{ lookup('env', 'COPY_TO_PATH') | default('/etc/origin/certificate_expiration_report', true) }}" + timestamp: "{{ lookup('pipe', 'date +%Y%m%d') }}" + tasks: + - name: Ensure that the target directory exists + file: + path: "{{ destination_path }}" + state: directory + - name: Copy the reports + copy: + dest: "{{ destination_path }}/{{ timestamp }}-{{ item }}" + src: "/tmp/{{ item }}" + with_items: + - "cert-expiry-report.html" + - "cert-expiry-report.json" diff --git a/playbooks/openshift-checks/certificate_expiry/easy-mode.yaml b/playbooks/openshift-checks/certificate_expiry/easy-mode.yaml new file mode 100644 index 000000000..ae41c7c14 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/easy-mode.yaml @@ -0,0 +1,18 @@ +--- +# This example playbook is great if you're just wanting to try the +# role out. +# +# This example enables HTML and JSON reports +# +# All certificates (healthy or not) are included in the results + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_save_json_results: yes + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_show_all: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml b/playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml new file mode 100644 index 000000000..d80cb6ff4 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml @@ -0,0 +1,12 @@ +--- +# Generate HTML and JSON artifacts in their default paths: + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_save_json_results: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml b/playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml new file mode 100644 index 000000000..2189455b7 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml @@ -0,0 +1,16 @@ +--- +# Generate timestamped HTML and JSON reports in /var/lib/certcheck + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_save_json_results: yes + openshift_certificate_expiry_show_all: yes + timestamp: "{{ lookup('pipe', 'date +%Y%m%d') }}" + openshift_certificate_expiry_html_report_path: "/var/lib/certcheck/{{ timestamp }}-cert-expiry-report.html" + openshift_certificate_expiry_json_results_path: "/var/lib/certcheck/{{ timestamp }}-cert-expiry-report.json" + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml b/playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml new file mode 100644 index 000000000..87a0f3be4 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml @@ -0,0 +1,13 @@ +--- +# Change the expiration warning window to 1500 days (good for testing +# the module out) and save the results as a JSON file: + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_warning_days: 1500 + openshift_certificate_expiry_save_json_results: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml b/playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml new file mode 100644 index 000000000..960457c4b --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml @@ -0,0 +1,12 @@ +--- +# Change the expiration warning window to 1500 days (good for testing +# the module out): + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_warning_days: 1500 + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/roles b/playbooks/openshift-checks/certificate_expiry/roles new file mode 120000 index 000000000..20c4c58cf --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/roles @@ -0,0 +1 @@ +../../../roles
\ No newline at end of file diff --git a/playbooks/openshift-checks/health.yml b/playbooks/openshift-checks/health.yml new file mode 100644 index 000000000..caac06626 --- /dev/null +++ b/playbooks/openshift-checks/health.yml @@ -0,0 +1,4 @@ +--- +- import_playbook: ../init/main.yml + +- import_playbook: private/health.yml diff --git a/playbooks/openshift-checks/pre-install.yml b/playbooks/openshift-checks/pre-install.yml new file mode 100644 index 000000000..4511f6e3c --- /dev/null +++ b/playbooks/openshift-checks/pre-install.yml @@ -0,0 +1,4 @@ +--- +- import_playbook: ../init/main.yml + +- import_playbook: private/pre-install.yml diff --git a/playbooks/openshift-checks/private/adhoc.yml b/playbooks/openshift-checks/private/adhoc.yml new file mode 100644 index 000000000..d0deaeb65 --- /dev/null +++ b/playbooks/openshift-checks/private/adhoc.yml @@ -0,0 +1,13 @@ +--- +- name: OpenShift Health Checks + hosts: oo_all_hosts + + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: adhoc + post_tasks: + - name: Run health checks (adhoc) + action: openshift_health_check + args: + checks: '{{ openshift_checks | default([]) }}' diff --git a/playbooks/openshift-checks/private/health.yml b/playbooks/openshift-checks/private/health.yml new file mode 100644 index 000000000..d0921b9d3 --- /dev/null +++ b/playbooks/openshift-checks/private/health.yml @@ -0,0 +1,13 @@ +--- +- name: OpenShift Health Checks + hosts: oo_all_hosts + + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: health + post_tasks: + - name: Run health checks (@health) + action: openshift_health_check + args: + checks: ['@health'] diff --git a/playbooks/openshift-checks/private/install.yml b/playbooks/openshift-checks/private/install.yml new file mode 100644 index 000000000..93cf6c359 --- /dev/null +++ b/playbooks/openshift-checks/private/install.yml @@ -0,0 +1,51 @@ +--- +- name: Health Check Checkpoint Start + hosts: all + gather_facts: false + tasks: + - name: Set Health Check 'In Progress' + run_once: true + set_stats: + data: + installer_phase_health: + status: "In Progress" + start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" + +- name: OpenShift Health Checks + hosts: oo_all_hosts + any_errors_fatal: true + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: install + post_tasks: + - name: Run health checks (install) - EL + when: ansible_distribution != "Fedora" + action: openshift_health_check + args: + checks: + - disk_availability + - memory_availability + - package_availability + - package_version + - docker_image_availability + - docker_storage + + - name: Run health checks (install) - Fedora + when: ansible_distribution == "Fedora" + action: openshift_health_check + args: + checks: + - docker_image_availability + +- name: Health Check Checkpoint End + hosts: all + gather_facts: false + tasks: + - name: Set Health Check 'Complete' + run_once: true + set_stats: + data: + installer_phase_health: + status: "Complete" + end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" diff --git a/playbooks/openshift-checks/private/pre-install.yml b/playbooks/openshift-checks/private/pre-install.yml new file mode 100644 index 000000000..32449d4e4 --- /dev/null +++ b/playbooks/openshift-checks/private/pre-install.yml @@ -0,0 +1,13 @@ +--- +- name: OpenShift Health Checks + hosts: oo_all_hosts + + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: pre-install + post_tasks: + - name: Run health checks (@preflight) + action: openshift_health_check + args: + checks: ['@preflight'] diff --git a/playbooks/openshift-checks/private/roles b/playbooks/openshift-checks/private/roles new file mode 120000 index 000000000..20c4c58cf --- /dev/null +++ b/playbooks/openshift-checks/private/roles @@ -0,0 +1 @@ +../../../roles
\ No newline at end of file diff --git a/playbooks/openshift-checks/roles b/playbooks/openshift-checks/roles new file mode 120000 index 000000000..b741aa3db --- /dev/null +++ b/playbooks/openshift-checks/roles @@ -0,0 +1 @@ +../../roles
\ No newline at end of file |