--- g_template_openshift_node: name: Template Openshift Node zitems: - key: openshift.node.process.count description: Shows number of OpenShift Node processes running type: int applications: - Openshift Node - key: openshift.node.ovs.pids.count description: Shows number of ovs process ids running type: int applications: - Openshift Node - key: openshift.node.ovs.ports.count description: Shows number of OVS ports defined type: int applications: - Openshift Node - key: openshift.node.ovs.stray.rules description: Number of OVS stray rules found/removed type: int applications: - Openshift Node - key: openshift.node.registry-pods.healthy_pct description: Shows the percentage of healthy registries in the cluster type: int applications: - Openshift Node - key: openshift.node.registry.service.ping description: Ping docker-registry service from node type: int applications: - Openshift Node ztriggers: - name: 'One or more Docker Registries is unhealthy according to {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#2)}<100 and {Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#1)}<100' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc' priority: avg - name: 'Docker Registry service is unhealthy according to {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.registry.service.ping.last(#2)}<1 and {Template Openshift Node:openshift.node.registry.service.ping.last(#1)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc' priority: avg - name: 'Openshift Node process not running on {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' priority: high - name: 'Too many Openshift Node processes running on {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1' url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' priority: high - name: '[HEAL] OVS may not be running on {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last(#1)}<>4 and {Template Openshift Node:openshift.node.ovs.pids.count.last(#2)}<>4' url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' priority: high - name: 'Number of OVS ports is 0 on {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' priority: high zactions: - name: '[HEAL] OVS may not be running on {HOST.NAME}' status: disabled escalation_time: 60 conditions_filter: calculation_type: "and/or" conditions: - conditiontype: maintenance status operator: not in - conditiontype: trigger name operator: like value: "[HEAL] OVS may not be running on" - conditiontype: trigger value operator: "=" value: PROBLEM operations: - esc_step_from: 1 esc_step_to: 1 esc_period: 0 operationtype: remote command opcommand: command: 'ssh -i /etc/openshift_tools/scriptrunner_id_rsa {{ ozb_scriptrunner_user }}@{{ ozb_scriptrunner_bastion_host }} remote-healer --host \"{HOST.NAME}\" --trigger \"{TRIGGER.NAME}\" --trigger-val \"{TRIGGER.VALUE}\"' execute_on: "zabbix server" type: 'custom script' target_hosts: - target_type: 'zabbix server' opconditions: - conditiontype: 'event acknowledged' operator: '=' value: 'not acknowledged'