From ebf6ed8d3d467a95fe62c22f131c82882a34c3ad Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Thu, 3 Aug 2017 15:35:15 +0200 Subject: Update health check README --- playbooks/byo/openshift-checks/README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'playbooks/byo/openshift-checks') diff --git a/playbooks/byo/openshift-checks/README.md b/playbooks/byo/openshift-checks/README.md index f0f14b268..5bb1b2fbf 100644 --- a/playbooks/byo/openshift-checks/README.md +++ b/playbooks/byo/openshift-checks/README.md @@ -7,15 +7,14 @@ Ansible's default operation mode is to fail fast, on the first error. However, when performing checks, it is useful to gather as much information about problems as possible in a single run. -Thus, the playbooks run a battery of checks against the inventory hosts and have -Ansible gather intermediate errors, giving a more complete diagnostic of the -state of each host. If any check failed, the playbook run will be marked as -failed. +Thus, the playbooks run a battery of checks against the inventory hosts and +gather intermediate errors, giving a more complete diagnostic of the state of +each host. If any check failed, the playbook run will be marked as failed. To facilitate understanding the problems that were encountered, a custom callback plugin summarizes execution errors at the end of a playbook run. -# Available playbooks +## Available playbooks 1. Pre-install playbook ([pre-install.yml](pre-install.yml)) - verifies system requirements and look for common problems that can prevent a successful -- cgit v1.2.3 From f98c978bd49f2473ce271d4fc69be7e4eea78125 Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Mon, 12 Jun 2017 11:46:48 +0200 Subject: Add playbook for running arbitrary health checks This is useful on its own, and also aids in developing/testing new checks that are not part of any playbook. Since the intent when running this playbook is to execute checks, opt for a less verbose explanation on the error summary. --- playbooks/byo/openshift-checks/README.md | 31 ++++++++++++++++++++++ playbooks/byo/openshift-checks/adhoc.yml | 6 +++++ playbooks/common/openshift-checks/adhoc.yml | 12 +++++++++ .../callback_plugins/zz_failure_summary.py | 2 +- 4 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 playbooks/byo/openshift-checks/adhoc.yml create mode 100644 playbooks/common/openshift-checks/adhoc.yml (limited to 'playbooks/byo/openshift-checks') diff --git a/playbooks/byo/openshift-checks/README.md b/playbooks/byo/openshift-checks/README.md index 5bb1b2fbf..c17f5b6b7 100644 --- a/playbooks/byo/openshift-checks/README.md +++ b/playbooks/byo/openshift-checks/README.md @@ -26,6 +26,9 @@ callback plugin summarizes execution errors at the end of a playbook run. 3. Certificate expiry playbooks ([certificate_expiry](certificate_expiry)) - check that certificates in use are valid and not expiring soon. +4. Adhoc playbook ([adhoc.yml](adhoc.yml)) - use it to run adhoc checks. + See the [next section](#the-adhoc-playbook) for a usage example. + ## Running With a [recent installation of Ansible](../../../README.md#setup), run the playbook @@ -58,6 +61,34 @@ against your inventory file. Here is the step-by-step: $ ansible-playbook -i playbooks/byo/openshift-checks/certificate_expiry/default.yaml -v ``` +### The adhoc playbook + +The adhoc playbook gives flexibility to run any check or a custom group of +checks. What will be run is determined by the `openshift_checks` variable, +which, among other ways supported by Ansible, can be set on the command line +using the `-e` flag. + +For example, to run the `docker_storage` check: + +```console +$ ansible-playbook -i playbooks/byo/openshift-checks/adhoc.yml -e openshift_checks=docker_storage +``` + +To run more checks, use a comma-separated list of check names: + +```console +$ ansible-playbook -i playbooks/byo/openshift-checks/adhoc.yml -e openshift_checks=docker_storage,disk_availability +``` + +To run an entire class of checks, use the name of a check group tag, prefixed by `@`. This will run all checks tagged `preflight`: + +```console +$ ansible-playbook -i playbooks/byo/openshift-checks/adhoc.yml -e openshift_checks=@preflight +``` + +It is valid to specify multiple check tags and individual check names together +in a comma-separated list. + ## Running in a container This repository is built into a Docker image including Ansible so that it can diff --git a/playbooks/byo/openshift-checks/adhoc.yml b/playbooks/byo/openshift-checks/adhoc.yml new file mode 100644 index 000000000..2ece40e96 --- /dev/null +++ b/playbooks/byo/openshift-checks/adhoc.yml @@ -0,0 +1,6 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml + +- include: ../../common/openshift-cluster/std_include.yml + +- include: ../../common/openshift-checks/adhoc.yml diff --git a/playbooks/common/openshift-checks/adhoc.yml b/playbooks/common/openshift-checks/adhoc.yml new file mode 100644 index 000000000..dfcef8435 --- /dev/null +++ b/playbooks/common/openshift-checks/adhoc.yml @@ -0,0 +1,12 @@ +--- +- name: OpenShift health checks + hosts: oo_all_hosts + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: adhoc + post_tasks: + - name: Run health checks + action: openshift_health_check + args: + checks: '{{ openshift_checks | default([]) }}' diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py index d10200719..9f9fe123a 100644 --- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py +++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py @@ -101,7 +101,7 @@ class CallbackModule(CallbackBase): 'Variables can be set in the inventory or passed on the\n' 'command line using the -e flag to ansible-playbook.\n\n' ).format(playbook=self._playbook_file, checks=checks) - if context in ['pre-install', 'health']: + if context in ['pre-install', 'health', 'adhoc']: summary = ( # user was expecting to run checks, less explanation needed '\n' 'You may choose to configure or disable failing checks by\n' -- cgit v1.2.3 From 25276bda8c002f4279e5c1748f64a9fd1ee999a4 Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Fri, 23 Jun 2017 15:31:12 +0200 Subject: List existing health checks when none is requested This is a simple mechanism to learn what health checks are available. Note that we defer task_vars verification, so that we can compute requested_checks and resolved_checks earlier, allowing us to list checks even if openshift_facts has not run. --- playbooks/byo/openshift-checks/README.md | 10 ++++- playbooks/byo/openshift-checks/adhoc.yml | 21 ++++++++++ .../action_plugins/openshift_health_check.py | 48 ++++++++++++++++++---- .../test/action_plugin_test.py | 3 +- 4 files changed, 73 insertions(+), 9 deletions(-) (limited to 'playbooks/byo/openshift-checks') diff --git a/playbooks/byo/openshift-checks/README.md b/playbooks/byo/openshift-checks/README.md index c17f5b6b7..b26e7d7ed 100644 --- a/playbooks/byo/openshift-checks/README.md +++ b/playbooks/byo/openshift-checks/README.md @@ -26,7 +26,8 @@ callback plugin summarizes execution errors at the end of a playbook run. 3. Certificate expiry playbooks ([certificate_expiry](certificate_expiry)) - check that certificates in use are valid and not expiring soon. -4. Adhoc playbook ([adhoc.yml](adhoc.yml)) - use it to run adhoc checks. +4. Adhoc playbook ([adhoc.yml](adhoc.yml)) - use it to run adhoc checks or to + list existing checks. See the [next section](#the-adhoc-playbook) for a usage example. ## Running @@ -89,6 +90,13 @@ $ ansible-playbook -i playbooks/byo/openshift-checks/adhoc.yml It is valid to specify multiple check tags and individual check names together in a comma-separated list. +To list all of the available checks and tags, run the adhoc playbook without +setting the `openshift_checks` variable: + +```console +$ ansible-playbook -i playbooks/byo/openshift-checks/adhoc.yml +``` + ## Running in a container This repository is built into a Docker image including Ansible so that it can diff --git a/playbooks/byo/openshift-checks/adhoc.yml b/playbooks/byo/openshift-checks/adhoc.yml index 2ece40e96..226bed732 100644 --- a/playbooks/byo/openshift-checks/adhoc.yml +++ b/playbooks/byo/openshift-checks/adhoc.yml @@ -1,4 +1,25 @@ --- +# NOTE: ideally this would be just part of a single play in +# common/openshift-checks/adhoc.yml that lists the existing checks when +# openshift_checks is not set or run the requested checks. However, to actually +# run the checks we need to have the included dependencies to run first and that +# takes time. To speed up listing checks, we use this separate play that runs +# before the include of dependencies to save time and improve the UX. +- name: OpenShift health checks + # NOTE: though the openshift_checks variable could be potentially defined on + # individual hosts while not defined for localhost, we do not support that + # usage. Running this play only in localhost speeds up execution. + hosts: localhost + connection: local + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: adhoc + pre_tasks: + - name: List known health checks + action: openshift_health_check + when: openshift_checks is undefined or not openshift_checks + - include: ../openshift-cluster/initialize_groups.yml - include: ../../common/openshift-cluster/std_include.yml diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py index 05e53333d..898d158a4 100644 --- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -28,25 +28,32 @@ class ActionModule(ActionBase): result = super(ActionModule, self).run(tmp, task_vars) task_vars = task_vars or {} - # vars are not supportably available in the callback plugin, - # so record any it will need in the result. + # callback plugins cannot read Ansible vars, but we would like + # zz_failure_summary to have access to certain values. We do so by + # storing the information we need in the result. result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context') - if "openshift" not in task_vars: - result["failed"] = True - result["msg"] = "'openshift' is undefined, did 'openshift_facts' run?" - return result - try: known_checks = self.load_known_checks(tmp, task_vars) args = self._task.args requested_checks = normalize(args.get('checks', [])) + + if not requested_checks: + result['failed'] = True + result['msg'] = list_known_checks(known_checks) + return result + resolved_checks = resolve_checks(requested_checks, known_checks.values()) except OpenShiftCheckException as e: result["failed"] = True result["msg"] = str(e) return result + if "openshift" not in task_vars: + result["failed"] = True + result["msg"] = "'openshift' is undefined, did 'openshift_facts' run?" + return result + result["checks"] = check_results = {} user_disabled_checks = normalize(task_vars.get('openshift_disable_check', [])) @@ -96,6 +103,33 @@ class ActionModule(ActionBase): return known_checks +def list_known_checks(known_checks): + """Return text listing the existing checks and tags.""" + # TODO: we could include a description of each check by taking it from a + # check class attribute (e.g., __doc__) when building the message below. + msg = ( + 'This playbook is meant to run health checks, but no checks were ' + 'requested. Set the `openshift_checks` variable to a comma-separated ' + 'list of check names or a YAML list. Available checks:\n {}' + ).format('\n '.join(sorted(known_checks))) + + tag_checks = defaultdict(list) + for cls in known_checks.values(): + for tag in cls.tags: + tag_checks[tag].append(cls.name) + tags = [ + '@{} = {}'.format(tag, ','.join(sorted(checks))) + for tag, checks in tag_checks.items() + ] + + msg += ( + '\n\nTags can be used as a shortcut to select multiple ' + 'checks. Available tags and the checks they select:\n {}' + ).format('\n '.join(sorted(tags))) + + return msg + + def resolve_checks(names, all_checks): """Returns a set of resolved check names. diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py index f5161d6f5..f73eb4f6e 100644 --- a/roles/openshift_health_checker/test/action_plugin_test.py +++ b/roles/openshift_health_checker/test/action_plugin_test.py @@ -80,7 +80,8 @@ def skipped(result): None, {}, ]) -def test_action_plugin_missing_openshift_facts(plugin, task_vars): +def test_action_plugin_missing_openshift_facts(plugin, task_vars, monkeypatch): + monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) assert failed(result, msg_has=['openshift_facts']) -- cgit v1.2.3