summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/openshift_master/tasks/main.yml11
-rw-r--r--roles/openshift_master/templates/atomic-openshift-master-controllers.service.j22
-rw-r--r--roles/openshift_node/tasks/main.yml6
-rw-r--r--roles/os_zabbix/tasks/main.yml13
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml8
-rw-r--r--roles/os_zabbix/vars/template_zagg_server.yml36
6 files changed, 64 insertions, 12 deletions
diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml
index bd3d8f90c..43647cc49 100644
--- a/roles/openshift_master/tasks/main.yml
+++ b/roles/openshift_master/tasks/main.yml
@@ -135,13 +135,11 @@
template:
src: atomic-openshift-master-api.service.j2
dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-api.service
- force: no
when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
- name: Create the controllers service file
template:
src: atomic-openshift-master-controllers.service.j2
dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-controllers.service
- force: no
when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
- name: Create the api env file
template:
@@ -230,7 +228,7 @@
register: start_result
- set_fact:
- master_service_status_changed = start_result | changed
+ master_service_status_changed: start_result | changed
when: not openshift_master_ha | bool
- name: Start and enable master api
@@ -239,19 +237,16 @@
register: start_result
- set_fact:
- master_api_service_status_changed = start_result | changed
+ master_api_service_status_changed: start_result | changed
when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
-# TODO: fix the ugly workaround of setting ignore_errors
-# the controllers service tries to start even if it is already started
- name: Start and enable master controller
service: name={{ openshift.common.service_type }}-master-controllers enabled=yes state=started
when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
register: start_result
- ignore_errors: yes
- set_fact:
- master_controllers_service_status_changed = start_result | changed
+ master_controllers_service_status_changed: start_result | changed
when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
- name: Install cluster packages
diff --git a/roles/openshift_master/templates/atomic-openshift-master-controllers.service.j2 b/roles/openshift_master/templates/atomic-openshift-master-controllers.service.j2
index 8952c86ef..ef0b57ef4 100644
--- a/roles/openshift_master/templates/atomic-openshift-master-controllers.service.j2
+++ b/roles/openshift_master/templates/atomic-openshift-master-controllers.service.j2
@@ -7,7 +7,7 @@ Before={{ openshift.common.service_type }}-node.service
Requires=network.target
[Service]
-Type=notify
+Type=simple
EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master-controllers
Environment=GOTRACEBACK=crash
ExecStart=/usr/bin/openshift start master controllers --config=${CONFIG_FILE} $OPTIONS
diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml
index eef7bec9a..38bffc2e5 100644
--- a/roles/openshift_node/tasks/main.yml
+++ b/roles/openshift_node/tasks/main.yml
@@ -85,11 +85,11 @@
docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries')
| oo_split() | union(['registry.access.redhat.com'])
| difference(['']) }}"
- when: openshift.common.deployment_type == 'enterprise'
+ when: openshift.common.deployment_type in ['enterprise', 'openshift-enterprise', 'atomic-enterprise']
- set_fact:
docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries')
| oo_split() | difference(['']) }}"
- when: openshift.common.deployment_type != 'enterprise'
+ when: openshift.common.deployment_type not in ['enterprise', 'openshift-enterprise', 'atomic-enterprise']
- name: Add personal registries
lineinfile:
@@ -131,4 +131,4 @@
register: start_result
- set_fact:
- node_service_status_changed = start_result | changed
+ node_service_status_changed: start_result | changed
diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml
index d0b307a3d..7552086d4 100644
--- a/roles/os_zabbix/tasks/main.yml
+++ b/roles/os_zabbix/tasks/main.yml
@@ -37,6 +37,9 @@
- include_vars: template_aws.yml
tags:
- aws
+- include_vars: template_zagg_server.yml
+ tags:
+ - zagg_server
- name: Include Template Heartbeat
include: ../../lib_zabbix/tasks/create_template.yml
@@ -137,3 +140,13 @@
password: "{{ ozb_password }}"
tags:
- aws
+
+- name: Include Template Zagg Server
+ include: ../../lib_zabbix/tasks/create_template.yml
+ vars:
+ template: "{{ g_template_zagg_server }}"
+ server: "{{ ozb_server }}"
+ user: "{{ ozb_user }}"
+ password: "{{ ozb_password }}"
+ tags:
+ - zagg_server
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 514d6fd24..a0ba8d104 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -269,6 +269,14 @@ g_template_openshift_master:
- 'Openshift Master process not running on {HOST.NAME}'
priority: avg
+ - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}'
+ expression: '{Template Openshift Master:create_app.sum(1h)}>3'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ description: The application create loop has failed 4 or more times in the last hour
+ priority: avg
+
- name: 'Openshift Master API health check is failing on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml
new file mode 100644
index 000000000..0e8e53bb7
--- /dev/null
+++ b/roles/os_zabbix/vars/template_zagg_server.yml
@@ -0,0 +1,36 @@
+---
+g_template_zagg_server:
+ name: Template Zagg Server
+ zitems:
+ - key: zagg.server.metrics.count
+ applications:
+ - Zagg Server
+ value_type: int
+
+ - key: zagg.server.processor.errors
+ applications:
+ - Zagg Server
+ value_type: int
+
+ - key: zagg.server.heartbeat.count
+ applications:
+ - Zagg Server
+ value_type: int
+
+ ztriggers:
+ - name: 'Error sending metrics on {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ priority: average
+
+ - name: 'Critically High number of metrics in Zagg queue {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>10000'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ priority: high
+
+ - name: 'High number of metrics in Zagg queue {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>5000'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ dependencies:
+ - 'Critically High number of metrics in Zagg queue {HOST.NAME}'
+ priority: average