12 files changed, 1258 insertions, 31 deletions
diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml
index 8347e9a61..1c8d88854 100644
--- a/roles/os_zabbix/tasks/main.yml
+++ b/roles/os_zabbix/tasks/main.yml
@@ -8,10 +8,42 @@
   register: templates
 
 - include_vars: template_heartbeat.yml
+  tags:
+  - heartbeat
 - include_vars: template_os_linux.yml
+  tags:
+  - linux
 - include_vars: template_docker.yml
+  tags:
+  - docker
 - include_vars: template_openshift_master.yml
+  tags:
+  - openshift_master
 - include_vars: template_openshift_node.yml
+  tags:
+  - openshift_node
+- include_vars: template_ops_tools.yml
+  tags:
+  - ops_tools
+- include_vars: template_app_zabbix_server.yml
+  tags:
+  - zabbix_server
+- include_vars: template_app_zabbix_agent.yml
+  tags:
+  - zabbix_agent
+- include_vars: template_performance_copilot.yml
+  tags:
+  - pcp
+- include_vars: template_aws.yml
+  tags:
+  - aws
+- include_vars: template_zagg_server.yml
+  tags:
+  - zagg_server
+
+- include_vars: template_config_loop.yml
+  tags:
+  - config_loop
 
 - name: Include Template Heartbeat
   include: ../../lib_zabbix/tasks/create_template.yml
@@ -20,6 +52,8 @@
     server: "{{ ozb_server }}"
     user: "{{ ozb_user }}"
     password: "{{ ozb_password }}"
+  tags:
+  - heartbeat
 
 - name: Include Template os_linux
   include: ../../lib_zabbix/tasks/create_template.yml
@@ -28,6 +62,8 @@
     server: "{{ ozb_server }}"
     user: "{{ ozb_user }}"
     password: "{{ ozb_password }}"
+  tags:
+  - linux
 
 - name: Include Template docker
   include: ../../lib_zabbix/tasks/create_template.yml
@@ -36,6 +72,8 @@
     server: "{{ ozb_server }}"
     user: "{{ ozb_user }}"
     password: "{{ ozb_password }}"
+  tags:
+  - docker
 
 - name: Include Template Openshift Master
   include: ../../lib_zabbix/tasks/create_template.yml
@@ -44,6 +82,8 @@
     server: "{{ ozb_server }}"
     user: "{{ ozb_user }}"
     password: "{{ ozb_password }}"
+  tags:
+  - openshift_master
 
 - name: Include Template Openshift Node
   include: ../../lib_zabbix/tasks/create_template.yml
@@ -52,3 +92,75 @@
     server: "{{ ozb_server }}"
     user: "{{ ozb_user }}"
     password: "{{ ozb_password }}"
+  tags:
+  - openshift_node
+
+- name: Include Template Ops Tools
+  include: ../../lib_zabbix/tasks/create_template.yml
+  vars:
+    template: "{{ g_template_ops_tools }}"
+    server: "{{ ozb_server }}"
+    user: "{{ ozb_user }}"
+    password: "{{ ozb_password }}"
+  tags:
+  - ops_tools
+
+- name: Include Template App Zabbix Server
+  include: ../../lib_zabbix/tasks/create_template.yml
+  vars:
+    template: "{{ g_template_app_zabbix_server }}"
+    server: "{{ ozb_server }}"
+    user: "{{ ozb_user }}"
+    password: "{{ ozb_password }}"
+  tags:
+  - zabbix_server
+
+- name: Include Template App Zabbix Agent
+  include: ../../lib_zabbix/tasks/create_template.yml
+  vars:
+    template: "{{ g_template_app_zabbix_agent }}"
+    server: "{{ ozb_server }}"
+    user: "{{ ozb_user }}"
+    password: "{{ ozb_password }}"
+  tags:
+  - zabbix_agent
+
+- name: Include Template Performance Copilot
+  include: ../../lib_zabbix/tasks/create_template.yml
+  vars:
+    template: "{{ g_template_performance_copilot }}"
+    server: "{{ ozb_server }}"
+    user: "{{ ozb_user }}"
+    password: "{{ ozb_password }}"
+  tags:
+  - pcp
+
+- name: Include Template AWS
+  include: ../../lib_zabbix/tasks/create_template.yml
+  vars:
+    template: "{{ g_template_aws }}"
+    server: "{{ ozb_server }}"
+    user: "{{ ozb_user }}"
+    password: "{{ ozb_password }}"
+  tags:
+  - aws
+
+- name: Include Template Zagg Server
+  include: ../../lib_zabbix/tasks/create_template.yml
+  vars:
+    template: "{{ g_template_zagg_server }}"
+    server: "{{ ozb_server }}"
+    user: "{{ ozb_user }}"
+    password: "{{ ozb_password }}"
+  tags:
+  - zagg_server
+
+- name: Include Template Config Loop
+  include: ../../lib_zabbix/tasks/create_template.yml
+  vars:
+    template: "{{ g_template_config_loop }}"
+    server: "{{ ozb_server }}"
+    user: "{{ ozb_user }}"
+    password: "{{ ozb_password }}"
+  tags:
+  - config_loop
diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml
new file mode 100644
index 000000000..d636d4822
--- /dev/null
+++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml
@@ -0,0 +1,23 @@
+---
+g_template_app_zabbix_agent:
+   name: Template App Zabbix Agent
+   zitems:
+   - key: agent.hostname
+     applications:
+     - Zabbix agent
+     value_type: character
+     zabbix_type: agent
+
+   - key: agent.ping
+     applications:
+     - Zabbix agent
+     description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check.
+     value_type: int
+     zabbix_type: agent
+
+   ztriggers:
+   - name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes'
+     description: Zabbix agent is unreachable for 15 minutes.
+     expression: '{Template App Zabbix Agent:agent.ping.nodata(15m)}=1'
+     priority: high
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_ping.asciidoc
diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml
new file mode 100644
index 000000000..43517113b
--- /dev/null
+++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml
@@ -0,0 +1,412 @@
+---
+g_template_app_zabbix_server:
+   name: Template App Zabbix Server
+   zitems:
+   - key: housekeeper_creates
+     applications:
+     - Zabbix server
+     description: A simple count of the number of partition creates output by the housekeeper script.
+     units: ''
+     value_type: int
+     zabbix_type: internal
+
+   - key: housekeeper_drops
+     applications:
+     - Zabbix server
+     description: A simple count of the number of partition drops output by the housekeeper script.
+     units: ''
+     value_type: int
+     zabbix_type: internal
+
+   - key: housekeeper_errors
+     applications:
+     - Zabbix server
+     description: A simple count of the number of errors output by the housekeeper script.
+     units: ''
+     value_type: int
+     zabbix_type: internal
+
+   - key: housekeeper_total
+     applications:
+     - Zabbix server
+     description: A simple count of the total number of lines output by the housekeeper
+       script.
+     units: ''
+     value_type: int
+     zabbix_type: internal
+
+   - key: zabbix[process,alerter,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,configuration syncer,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,db watchdog,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,discoverer,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,escalator,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,history syncer,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,housekeeper,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,http poller,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,icmp pinger,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,ipmi poller,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,java poller,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,node watcher,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,poller,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,proxy poller,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,self-monitoring,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,snmp trapper,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,timer,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,trapper,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[process,unreachable poller,avg,busy]
+     applications:
+     - Zabbix server
+     description: ''
+     units: '%'
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[queue,10m]
+     applications:
+     - Zabbix server
+     description: ''
+     units: ''
+     value_type: int
+     zabbix_type: internal
+     interval: 600
+
+   - key: zabbix[queue]
+     applications:
+     - Zabbix server
+     description: ''
+     units: ''
+     value_type: int
+     zabbix_type: internal
+     interval: 600
+
+   - key: zabbix[rcache,buffer,pfree]
+     applications:
+     - Zabbix server
+     description: ''
+     units: ''
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[wcache,history,pfree]
+     applications:
+     - Zabbix server
+     description: ''
+     units: ''
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[wcache,text,pfree]
+     applications:
+     - Zabbix server
+     description: ''
+     units: ''
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[wcache,trend,pfree]
+     applications:
+     - Zabbix server
+     description: ''
+     units: ''
+     value_type: float
+     zabbix_type: internal
+
+   - key: zabbix[wcache,values]
+     applications:
+     - Zabbix server
+     description: ''
+     units: ''
+     value_type: float
+     zabbix_type: internal
+     delta: 1 # speed per second
+
+   ztriggers:
+   - description: "There has been unexpected output while running the housekeeping script\
+       \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\
+       \ and we've gotten something enw.\r\n\r\nCheck the script's output in /var/lib/zabbix/state\
+       \ for more details."
+     expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}+{Template App Zabbix Server:housekeeper_creates.last(0)}+{Template App Zabbix Server:housekeeper_drops.last(0)}<>{Template App Zabbix Server:housekeeper_total.last(0)}'
+     name: Unexpected output in Zabbix DB Housekeeping
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_DB_Housekeeping.asciidoc
+
+   - description: An error has occurred during running the housekeeping script on the Zabbix. Check the script's output in /var/lib/zabbix/state for more details.
+     expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}>0'
+     name: Errors during Zabbix DB Housekeeping
+     priority: high
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,alerter,avg,busy].min(600)}>75'
+     name: Zabbix alerter processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,configuration syncer,avg,busy].min(600)}>75'
+     name: Zabbix configuration syncer processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,db watchdog,avg,busy].min(600)}>75'
+     name: Zabbix db watchdog processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,discoverer,avg,busy].min(600)}>75'
+     name: Zabbix discoverer processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,escalator,avg,busy].min(600)}>75'
+     name: Zabbix escalator processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,history syncer,avg,busy].min(600)}>75'
+     name: Zabbix history syncer processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,housekeeper,avg,busy].min(1800)}>75'
+     name: Zabbix housekeeper processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,http poller,avg,busy].min(600)}>75'
+     name: Zabbix http poller processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,icmp pinger,avg,busy].min(600)}>75'
+     name: Zabbix icmp pinger processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,ipmi poller,avg,busy].min(600)}>75'
+     name: Zabbix ipmi poller processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,java poller,avg,busy].min(600)}>75'
+     name: Zabbix java poller processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,node watcher,avg,busy].min(600)}>75'
+     name: Zabbix node watcher processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,poller,avg,busy].min(600)}>75'
+     name: Zabbix poller processes more than 75% busy
+     priority: high
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,proxy poller,avg,busy].min(600)}>75'
+     name: Zabbix proxy poller processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,self-monitoring,avg,busy].min(600)}>75'
+     name: Zabbix self-monitoring processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,snmp trapper,avg,busy].min(600)}>75'
+     name: Zabbix snmp trapper processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: Timer processes usually are busy because they have to process time
+       based trigger functions
+     expression: '{Template App Zabbix Server:zabbix[process,timer,avg,busy].min(600)}>75'
+     name: Zabbix timer processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,trapper,avg,busy].min(600)}>75'
+     name: Zabbix trapper processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[process,unreachable poller,avg,busy].min(600)}>75'
+     name: Zabbix unreachable poller processes more than 75% busy
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
+
+   - description: "This alert generally indicates a performance problem or a problem\
+       \ with the zabbix-server or proxy.\r\n\r\nThe first place to check for issues\
+       \ is Administration > Queue. Be sure to check the general view and the per-proxy\
+       \ view."
+     expression: '{Template App Zabbix Server:zabbix[queue,10m].min(600)}>1000'
+     name: More than 1000 items having missing data for more than 10 minutes
+     priority: high
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/data_lost_overview_plugin.asciidoc
+
+   - description: Consider increasing CacheSize in the zabbix_server.conf configuration
+       file
+     expression: '{Template App Zabbix Server:zabbix[rcache,buffer,pfree].min(600)}<5'
+     name: Less than 5% free in the configuration cache
+     priority: info
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[wcache,history,pfree].min(600)}<25'
+     name: Less than 25% free in the history cache
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[wcache,text,pfree].min(600)}<25'
+     name: Less than 25% free in the text history cache
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
+
+   - description: ''
+     expression: '{Template App Zabbix Server:zabbix[wcache,trend,pfree].min(600)}<25'
+     name: Less than 25% free in the trends cache
+     priority: avg
+     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
diff --git a/roles/os_zabbix/vars/template_aws.yml b/roles/os_zabbix/vars/template_aws.yml
new file mode 100644
index 000000000..57832a3fe
--- /dev/null
+++ b/roles/os_zabbix/vars/template_aws.yml
@@ -0,0 +1,25 @@
+---
+g_template_aws:
+  name: Template AWS
+  zdiscoveryrules:
+  - name: disc.aws
+    key: disc.aws
+    lifetime: 14
+    description: "Dynamically register AWS bucket info"
+
+  zitemprototypes:
+  - discoveryrule_key: disc.aws
+    name: "S3 bucket size (GB) [{#S3_BUCKET}]"
+    key: "disc.aws.size[{#S3_BUCKET}]"
+    value_type: int
+    description: "Size of S3 bucket"
+    applications:
+    - AWS
+
+  - discoveryrule_key: disc.aws
+    name: "S3 bucket object count [{#S3_BUCKET}]"
+    key: "disc.aws.objects[{#S3_BUCKET}]"
+    value_type: int
+    description: "Objects in S3 bucket"
+    applications:
+    - AWS
diff --git a/roles/os_zabbix/vars/template_config_loop.yml b/roles/os_zabbix/vars/template_config_loop.yml
new file mode 100644
index 000000000..823da1868
--- /dev/null
+++ b/roles/os_zabbix/vars/template_config_loop.yml
@@ -0,0 +1,14 @@
+---
+g_template_config_loop:
+  name: Template Config Loop
+  zitems:
+  - key: config_loop.run.exit_code
+    applications:
+    - Config Loop
+    value_type: int
+
+  ztriggers:
+  - name: 'config_loop.run.exit_code not zero on {HOST.NAME}'
+    expression: '{Template Config Loop:config_loop.run.exit_code.min(#2)}>0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_config_loop.asciidoc'
+    priority: average
diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml
index 395e054de..dd13e76f7 100644
--- a/roles/os_zabbix/vars/template_docker.yml
+++ b/roles/os_zabbix/vars/template_docker.yml
@@ -7,6 +7,21 @@ g_template_docker:
     - Docker Daemon
     value_type: int
 
+  - key: docker.info_elapsed_ms
+    applications:
+    - Docker Daemon
+    value_type: int
+
+  - key: docker.container.dns.resolution
+    applications:
+    - Docker Daemon
+    value_type: int
+
+  - key: docker.container.existing.dns.resolution.failed
+    applications:
+    - Docker Daemon
+    value_type: int
+
   - key: docker.storage.is_loopback
     applications:
     - Docker Storage
@@ -57,6 +72,18 @@ g_template_docker:
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc'
     priority: high
 
+  # Re-enable for OpenShift 3.1.1 (https://bugzilla.redhat.com/show_bug.cgi?id=1292971#c6)
+  - name: 'docker.container.dns.resolution failed on {HOST.NAME}'
+    expression: '{Template Docker:docker.container.dns.resolution.min(#3)}>0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc'
+    priority: average
+    status: disabled
+
+  - name: 'docker.container.existing.dns.resolution.failed on {HOST.NAME}'
+    expression: '{Template Docker:docker.container.existing.dns.resolution.failed.min(#3)}>0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc'
+    priority: average
+
   - name: 'Docker storage is using LOOPBACK on {HOST.NAME}'
     expression: '{Template Docker:docker.storage.is_loopback.last()}<>0'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc'
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index c71e07910..e36f23a2b 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -2,10 +2,10 @@
 g_template_openshift_master:
   name: Template Openshift Master
   zitems:
-  - name: create_app
+  - name: openshift.master.app.create
     applications:
     - Openshift Master
-    key: create_app
+    key: openshift.master.app.create
 
   - key: openshift.master.process.count
     description: Shows number of master processes running
@@ -13,12 +13,255 @@ g_template_openshift_master:
     applications:
     - Openshift Master
 
-  ztriggers:
-  - name: 'Application creation has failed on {HOST.NAME}'
-    expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
-    priority: avg
+  - key: openshift.master.api.ping
+    description: "Verify that the Openshift API is up (uses the cluster API URL)"
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.local.api.ping
+    description: "Verify that the Openshift API is up on the host (uses the API URL as the https://127.0.0.1)"
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.api.healthz
+    description: "Checks the healthz check of the master's api: https://<cluster_api_url>/healthz"
+    type: int
+    data_type: bool
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.local.api.healthz
+    description: "Checks the healthz check of the master's api: https://127.0.0.1/healthz"
+    type: int
+    data_type: bool
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.user.count
+    description: Shows number of users in a cluster
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.pod.running.count
+    description: Shows number of pods running
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.pod.user.running.count
+    description: Shows number of user pods running (non infrastructure pods)
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.pod.total.count
+    description: Shows total number of pods (running and non running)
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.node.count
+    description: Shows the total number of nodes found in the Openshift Cluster
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.project.count
+    description: Shows number of projects on a cluster
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.pv.total.count
+    description: Total number of Persistent Volumes in the Openshift Cluster
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.pv.available.count
+    description: Total number of Available Persistent Volumes in the Openshift Cluster
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.pv.released.count
+    description: Total number of Released Persistent Volumes in the Openshift Cluster
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.pv.bound.count
+    description: Total number of Bound Persistent Volumes in the Openshift Cluster
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.pv.failed.count
+    description: Total number of Failed Persistent Volumes in the Openshift Cluster
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.skydns.port.open
+    description: State of the SkyDNS port open and listening
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.skydns.query
+    description: SkyDNS can be queried or not
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.etcd.create.success
+    description: Show number of successful create actions
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.create.fail
+    description: Show number of failed create actions
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.delete.success
+    description: Show number of successful delete actions
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.delete.fail
+    description: Show number of failed delete actions
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.get.success
+    description: Show number of successful get actions
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.get.fail
+    description: Show number of failed get actions
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.set.success
+    description: Show number of successful set actions
+    type: int
+    applications:
+    - Openshift Etcd
 
+  - key: openshift.master.etcd.set.fail
+    description: Show number of failed set actions
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.update.success
+    description: Show number of successful update actions
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.update.fail
+    description: Show number of failed update actions
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.watchers
+    description: Show number of etcd watchers
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.etcd.ping
+    description: etcd ping
+    type: int
+    applications:
+    - Openshift Etcd
+
+  - key: openshift.master.metric.ping
+    description: "This check verifies that the https://master/metrics check is alive and communicating properly."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  - key: openshift.master.nodesnotready.count
+    description: "This check shows how many nodes in a cluster are in NotReady state."
+    type: int
+    applications: 
+    - Openshift Master
+
+  - key: openshift.master.nodesnotschedulable.count
+    description: "This check shows how many nodes in a cluster are not schedulable."
+    type: int
+    applications:
+    - Openshift Master
+
+  - key: openshift.master.apiserver.latency.summary.pods.quantile.list.5
+    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 50% of the pod operations have taken to completed."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  - key: openshift.master.apiserver.latency.summary.pods.quantile.list.9
+    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 90% of the pod operations have taken to completed."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  - key: openshift.master.apiserver.latency.summary.pods.quantile.list.99
+    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 99% of the pod operations have taken to completed."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5
+    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 50% of the pod operations have taken to completed."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9
+    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 90% of the pod operations have taken to completed."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99
+    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 99% of the pod operations have taken to completed."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.5
+    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 50% of the end to end scheduling operations have taken to completed."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.9
+    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 90% of the end to end scheduling operations have taken to completed."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.99
+    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 99% of the end to end scheduling operations have taken to completed."
+    type: int
+    applications:
+    - Openshift Master Metrics
+
+  ztriggers:
   - name: 'Openshift Master process not running on {HOST.NAME}'
     expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
@@ -28,3 +271,120 @@ g_template_openshift_master:
     expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
     priority: high
+
+  - name: 'Etcd ping failed on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+    priority: high
+
+  - name: 'Number of users for Openshift Master on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.user.count.last()}=0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    priority: info
+
+  - name: 'There are no projects running on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.project.count.last()}=0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    priority: info
+
+  # Put triggers that depend on other triggers here (deps must be created first)
+  - name: 'Application creation has failed on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.app.create.last(#1)}=1 and {Template Openshift Master:openshift.master.app.create.last(#2)}=1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    priority: avg
+
+  - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.app.create.sum(1h)}>3'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    description: The application create loop has failed 4 or more times in the last hour
+    priority: avg
+
+  - name: 'Openshift Master API health check is failing on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    priority: high
+
+  - name: 'Openshift Master Local API health check is failing on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.local.api.healthz.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    priority: high
+
+  - name: 'Openshift Master API PING check is failing on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    priority: high
+
+  - name: 'Openshift Master Local API PING check is failing on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.local.api.ping.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    priority: high
+
+  - name: 'Openshift Master metric PING check is failing on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    priority: avg
+
+  - name: 'SkyDNS port not listening on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.skydns.port.open.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    priority: high
+
+  - name: 'SkyDNS query failed on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.skydns.query.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    dependencies:
+    - 'Openshift Master API health check is failing on {HOST.NAME}'
+    priority: high
+
+  - name: 'Hosts not ready according to {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.nodesnotready.count.last(#2)}>0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
+    dependencies: 
+    - 'Openshift Master process not running on {HOST.NAME}'
+    priority: high
+
+  zgraphs:
+  - name: Openshift Master API Server Latency Pods LIST Quantiles
+    width: 900
+    height: 200
+    graph_items:
+    - item_name: openshift.master.apiserver.latency.summary.pods.quantile.list.5
+      color: red
+    - item_name: openshift.master.apiserver.latency.summary.pods.quantile.list.9
+      color: blue
+    - item_name: openshift.master.apiserver.latency.summary.pods.quantile.list.99
+      color: orange
+
+  - name: Openshift Master API Server Latency Pods WATCHLIST Quantiles
+    width: 900
+    height: 200
+    graph_items:
+    - item_name: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5
+      color: red
+    - item_name: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9
+      color: blue
+    - item_name: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99
+      color: orange
+
+  - name: Openshift Master Scheduler End to End Latency Quantiles
+    width: 900
+    height: 200
+    graph_items:
+    - item_name: openshift.master.scheduler.e2e.scheduling.latency.quantile.5
+      color: red
+    - item_name: openshift.master.scheduler.e2e.scheduling.latency.quantile.9
+      color: blue
+    - item_name: openshift.master.scheduler.e2e.scheduling.latency.quantile.99
+      color: orange
diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml
index 36f9cc4a3..66bd3a147 100644
--- a/roles/os_zabbix/vars/template_openshift_node.yml
+++ b/roles/os_zabbix/vars/template_openshift_node.yml
@@ -8,13 +8,63 @@ g_template_openshift_node:
     applications:
     - Openshift Node
 
+  - key: openshift.node.ovs.pids.count
+    description: Shows number of ovs process ids running
+    type: int
+    applications:
+    - Openshift Node
+
+  - key: openshift.node.ovs.ports.count
+    description: Shows number of OVS ports defined
+    type: int
+    applications:
+    - Openshift Node
+
+  - key: openshift.node.ovs.stray.rules
+    description: Number of OVS stray rules found/removed
+    type: int
+    applications:
+    - Openshift Node
+
+  - key: openshift.node.registry-pods.healthy_pct
+    description: Shows the percentage of healthy registries in the cluster
+    type: int
+    applications:
+    - Openshift Node
+
+  - key: openshift.node.registry.service.ping
+    description: Ping docker-registry service from node
+    type: int
+    applications:
+    - Openshift Node
+
   ztriggers:
+  - name: 'One or more Docker Registries is unhealthy according to {HOST.NAME}'
+    expression: '{Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#2)}<100 and {Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#1)}<100'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
+    priority: avg
+
+  - name: 'Docker Registry service is unhealthy according to {HOST.NAME}'
+    expression: '{Template Openshift Node:openshift.node.registry.service.ping.last(#2)}<1 and {Template Openshift Node:openshift.node.registry.service.ping.last(#1)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
+    priority: avg
+
   - name: 'Openshift Node process not running on {HOST.NAME}'
     expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
+    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
     priority: high
 
   - name: 'Too many Openshift Node processes running on {HOST.NAME}'
     expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
+    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
+    priority: high
+
+  - name: '[Heal] OVS may not be running on {HOST.NAME}'
+    expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last(#1)}<>4 and {Template Openshift Node:openshift.node.ovs.pids.count.last(#2)}<>4'
+    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
+    priority: high
+
+  - name: 'Number of OVS ports is 0 on {HOST.NAME}'
+    expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0'
+    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
     priority: high
diff --git a/roles/os_zabbix/vars/template_ops_tools.yml b/roles/os_zabbix/vars/template_ops_tools.yml
new file mode 100644
index 000000000..a0a5a4d03
--- /dev/null
+++ b/roles/os_zabbix/vars/template_ops_tools.yml
@@ -0,0 +1,54 @@
+---
+g_template_ops_tools:
+  name: Template Operations Tools
+  zdiscoveryrules:
+  - name: disc.ops.runner
+    key: disc.ops.runner
+    lifetime: 1
+    description: "Dynamically register operations runner items"
+
+  zitemprototypes:
+  - discoveryrule_key: disc.ops.runner
+    name: "Exit code of ops-runner[{#OSO_COMMAND}]"
+    key: "disc.ops.runner.command.exitcode[{#OSO_COMMAND}]"
+    value_type: int
+    description: "The exit code of the command run from ops-runner"
+    applications:
+    - Ops Runner
+
+  ztriggerprototypes:
+  - name: 'ops-runner[{#OSO_COMMAND}]: non-zero exit code on {HOST.NAME}'
+    expression: '{Template Operations Tools:disc.ops.runner.command.exitcode[{#OSO_COMMAND}].last()}<>0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_ops_runner_command.asciidoc'
+    priority: average
+
+  zactions:
+  - name: 'Remote command for [Heal] triggers'
+    status: enabled
+    escalation_time: 60
+    conditions_filter:
+      calculation_type: "and/or"
+      conditions:
+      - conditiontype: maintenance status
+        operator: not in
+      - conditiontype: trigger name
+        operator: like
+        value: "[Heal]"
+      - conditiontype: trigger value
+        operator: "="
+        value: PROBLEM
+    operations:
+    - esc_step_from: 1
+      esc_step_to: 1
+      esc_period: 0
+      operationtype: remote command
+      opcommand:
+        command: 'ssh -i /etc/openshift_tools/scriptrunner_id_rsa {{ ozb_scriptrunner_user }}@{{ ozb_scriptrunner_bastion_host }} remote-healer --host \"{HOST.NAME}\" --trigger \"{TRIGGER.NAME}\" --trigger-val \"{TRIGGER.VALUE}\"'
+        execute_on: "zabbix server"
+        type: 'custom script'
+      target_hosts:
+      - target_type: 'zabbix server'
+      opconditions:
+      - conditiontype: 'event acknowledged'
+        operator: '='
+        value: 'not acknowledged'
diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml
index 3173c79b2..c6e557f12 100644
--- a/roles/os_zabbix/vars/template_os_linux.yml
+++ b/roles/os_zabbix/vars/template_os_linux.yml
@@ -10,17 +10,20 @@ g_template_os_linux:
   - key: kernel.all.cpu.wait.total
     applications:
     - Kernel
-    value_type: int
+    value_type: float
+    units: '%'
 
   - key: kernel.all.cpu.irq.hard
     applications:
     - Kernel
-    value_type: int
+    value_type: float
+    units: '%'
 
   - key: kernel.all.cpu.idle
     applications:
     - Kernel
-    value_type: int
+    value_type: float
+    units: '%'
 
   - key: kernel.uname.distro
     applications:
@@ -35,7 +38,8 @@ g_template_os_linux:
   - key: kernel.all.cpu.irq.soft
     applications:
     - Kernel
-    value_type: int
+    value_type: float
+    units: '%'
 
   - key: kernel.all.load.15_minute
     applications:
@@ -45,7 +49,8 @@ g_template_os_linux:
   - key: kernel.all.cpu.sys
     applications:
     - Kernel
-    value_type: int
+    value_type: float
+    units: '%'
 
   - key: kernel.all.load.5_minute
     applications:
@@ -55,7 +60,8 @@ g_template_os_linux:
   - key: kernel.all.cpu.nice
     applications:
     - Kernel
-    value_type: int
+    value_type: float
+    units: '%'
 
   - key: kernel.all.load.1_minute
     applications:
@@ -75,7 +81,8 @@ g_template_os_linux:
   - key: kernel.all.cpu.user
     applications:
     - Kernel
-    value_type: int
+    value_type: float
+    units: '%'
 
   - key: kernel.uname.machine
     applications:
@@ -90,7 +97,8 @@ g_template_os_linux:
   - key: kernel.all.cpu.steal
     applications:
     - Kernel
-    value_type: int
+    value_type: float
+    units: '%'
 
   - key: kernel.all.pswitch
     applications:
@@ -180,38 +188,105 @@ g_template_os_linux:
     multiplier: 1024
     units: B
 
-  # Disk items
-  - key: filesys.full.xvda2
+  zdiscoveryrules:
+  - name: disc.filesys
+    key: disc.filesys
+    lifetime: 1
+    description: "Dynamically register the filesystems"
+
+  - name: disc.disk
+    key: disc.disk
+    lifetime: 1
+    description: "Dynamically register disks on a node"
+
+  - name: disc.network
+    key: disc.network
+    lifetime: 1
+    description: "Dynamically register network interfaces on a node"
+
+  zitemprototypes:
+  - discoveryrule_key: disc.filesys
+    name: "disc.filesys.full.{#OSO_FILESYS}"
+    key: "disc.filesys.full[{#OSO_FILESYS}]"
+    value_type: float
+    description: "PCP filesys.full option.  This is the percent full returned from pcp filesys.full"
     applications:
     - Disk
+
+  - discoveryrule_key: disc.filesys
+    name: "Percentage of used inodes on {#OSO_FILESYS}"
+    key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]"
     value_type: float
+    description: "PCP derived value of percentage of used inodes on a filesystem."
+    applications:
+    - Disk
 
-  - key: filesys.full.xvda3
+  - discoveryrule_key: disc.disk
+    name: "TPS (IOPS) for disk {#OSO_DISK}"
+    key: "disc.disk.tps[{#OSO_DISK}]"
+    value_type: int
+    description: "PCP disk.dev.totals metric measured over a period of time.  This shows how many disk transactions per second the disk is using"
     applications:
     - Disk
+
+  - discoveryrule_key: disc.disk
+    name: "Percent Utilized for disk {#OSO_DISK}"
+    key: "disc.disk.putil[{#OSO_DISK}]"
     value_type: float
+    description: "PCP disk.dev.avactive metric measured over a period of time.  This is the '%util' in the iostat command"
+    applications:
+    - Disk
 
-  ztriggers:
-  - name: 'Filesystem: / has less than 10% free on {HOST.NAME}'
-    expression: '{Template OS Linux:filesys.full.xvda2.last()}>90'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
-    priority: warn
+  - discoveryrule_key: disc.network
+    name: "Bytes per second IN on network interface {#OSO_NET_INTERFACE}"
+    key: "disc.network.in.bytes[{#OSO_NET_INTERFACE}]"
+    value_type: int
+    units: B
+    delta: 1
+    description: "PCP network.interface.in.bytes metric.  This is setup as a delta in Zabbix to measure the speed per second"
+    applications:
+    - Network
 
-  - name: 'Filesystem: / has less than 5% free on {HOST.NAME}'
-    expression: '{Template OS Linux:filesys.full.xvda2.last()}>95'
+  - discoveryrule_key: disc.network
+    name: "Bytes per second OUT on network interface {#OSO_NET_INTERFACE}"
+    key: "disc.network.out.bytes[{#OSO_NET_INTERFACE}]"
+    value_type: int
+    units: B
+    delta: 1
+    description: "PCP network.interface.out.bytes metric.  This is setup as a delta in Zabbix to measure the speed per second"
+    applications:
+    - Network
+
+  ztriggerprototypes:
+  - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
+    expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
     priority: high
 
-  - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}'
-    expression: '{Template OS Linux:filesys.full.xvda3.last()}>90'
+  # This has a dependency on the previous trigger
+  # Trigger Prototypes do not work in 2.4.  They will work in Zabbix 3.0
+  - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}'
+    expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
     priority: warn
+    dependencies:
+    - 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
 
-  - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}'
-    expression: '{Template OS Linux:filesys.full.xvda3.last()}>95'
+  - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
+    expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
     priority: high
 
+  # This has a dependency on the previous trigger
+  # Trigger Prototypes do not work in 2.4.  They will work in Zabbix 3.0
+  - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}'
+    expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
+    priority: warn
+    dependencies:
+    - 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
+
+  ztriggers:
   - name: 'Too many TOTAL processes on {HOST.NAME}'
     expression: '{Template OS Linux:proc.nprocs.last()}>5000'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'
@@ -222,3 +297,18 @@ g_template_os_linux:
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'
     priority: warn
     description: 'Alert on less than 30MegaBytes.  This is 30 Million Bytes.  30000 KB x 1024'
+
+    #  CPU Utilization #
+  - name: 'CPU idle less than 5% on {HOST.NAME}'
+    expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<5'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
+    priority: average
+    description: 'CPU is less than 5% idle'
+
+  - name: 'CPU idle less than 10% on {HOST.NAME}'
+    expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<10'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
+    priority: average
+    description: 'CPU is less than 10% idle'
+    dependencies:
+    - 'CPU idle less than 5% on {HOST.NAME}'
diff --git a/roles/os_zabbix/vars/template_performance_copilot.yml b/roles/os_zabbix/vars/template_performance_copilot.yml
new file mode 100644
index 000000000..b62fa0228
--- /dev/null
+++ b/roles/os_zabbix/vars/template_performance_copilot.yml
@@ -0,0 +1,14 @@
+---
+g_template_performance_copilot:
+  name: Template Performance Copilot
+  zitems:
+  - key: pcp.ping
+    applications:
+    - Performance Copilot
+    value_type: int
+
+  ztriggers:
+  - name: 'pcp.ping failed on {HOST.NAME}'
+    expression: '{Template Performance Copilot:pcp.ping.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_pcp_ping.asciidoc'
+    priority: average
diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml
new file mode 100644
index 000000000..db5665993
--- /dev/null
+++ b/roles/os_zabbix/vars/template_zagg_server.yml
@@ -0,0 +1,46 @@
+---
+g_template_zagg_server:
+  name: Template Zagg Server
+  zitems:
+  - key: zagg.server.metrics.count
+    applications:
+    - Zagg Server
+    value_type: int
+
+  - key: zagg.server.metrics.errors
+    applications:
+    - Zagg Server
+    value_type: int
+
+  - key: zagg.server.heartbeat.errors
+    applications:
+    - Zagg Server
+    value_type: int
+
+  - key: zagg.server.heartbeat.count
+    applications:
+    - Zagg Server
+    value_type: int
+
+  ztriggers:
+  - name: 'Error processing metrics on {HOST.NAME}'
+    expression: '{Template Zagg Server:zagg.server.metrics.errors.min(#3)}>0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+    priority: average
+
+  - name: 'Error processing heartbeats on {HOST.NAME}'
+    expression: '{Template Zagg Server:zagg.server.heartbeat.errors.min(#3)}>0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+    priority: average
+
+  - name: 'Critically High number of metrics in Zagg queue {HOST.NAME}'
+    expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>10000'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+    priority: high
+
+  - name: 'High number of metrics in Zagg queue {HOST.NAME}'
+    expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>5000'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+    dependencies:
+    - 'Critically High number of metrics in Zagg queue {HOST.NAME}'
+    priority: average