diff options
Diffstat (limited to 'roles/os_zabbix')
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 25 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_node.yml | 31 |
2 files changed, 30 insertions, 26 deletions
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 1824d7881..e36f23a2b 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -7,12 +7,6 @@ g_template_openshift_master: - Openshift Master key: openshift.master.app.create - - key: openshift.master.registry.healthy_pct - description: "Shows the percentage of healthy registries in the cluster" - type: int - applications: - - Openshift Master - - key: openshift.master.process.count description: Shows number of master processes running type: int @@ -278,11 +272,6 @@ g_template_openshift_master: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high - - name: 'Low number of etcd watchers on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' - priority: avg - - name: 'Etcd ping failed on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' @@ -345,20 +334,6 @@ g_template_openshift_master: - 'Openshift Master process not running on {HOST.NAME}' priority: avg - - name: 'One or more Docker Registries is unhealthy according to {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.registry.healthy_pct.last(#2)}<100 and {Template Openshift Master:openshift.master.registry.healthy_pct.max(#2)}>50' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc' - dependencies: - - 'Openshift Master process not running on {HOST.NAME}' - priority: avg - - - name: 'Multiple Docker Registries are unhealthy according to {HOST.NAME}' - expression: '{Template Openshift Master:openshift.master.registry.healthy_pct.last(#2)}<51' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc' - dependencies: - - 'Openshift Master process not running on {HOST.NAME}' - priority: high - - name: 'SkyDNS port not listening on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.skydns.port.open.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml index c36c593df..e6daee8e4 100644 --- a/roles/os_zabbix/vars/template_openshift_node.yml +++ b/roles/os_zabbix/vars/template_openshift_node.yml @@ -69,4 +69,33 @@ g_template_openshift_node: url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' priority: high - + zactions: + - name: '[HEAL] OVS may not be running on {HOST.NAME}' + status: disabled + escalation_time: 60 + conditions_filter: + calculation_type: "and/or" + conditions: + - conditiontype: maintenance status + operator: not in + - conditiontype: trigger name + operator: like + value: "[HEAL] OVS may not be running on" + - conditiontype: trigger value + operator: "=" + value: PROBLEM + operations: + - esc_step_from: 1 + esc_step_to: 1 + esc_period: 0 + operationtype: remote command + opcommand: + command: 'ssh -i /etc/openshift_tools/scriptrunner_id_rsa {{ ozb_scriptrunner_user }}@{{ ozb_scriptrunner_bastion_host }} remote-healer --host \"{HOST.NAME}\" --trigger \"{TRIGGER.NAME}\" --trigger-val \"{TRIGGER.VALUE}\"' + execute_on: "zabbix server" + type: 'custom script' + target_hosts: + - target_type: 'zabbix server' + opconditions: + - conditiontype: 'event acknowledged' + operator: '=' + value: 'not acknowledged' |