diff options
Diffstat (limited to 'roles')
-rw-r--r-- | roles/openshift_master/tasks/main.yml | 11 | ||||
-rw-r--r-- | roles/openshift_master/templates/atomic-openshift-master-controllers.service.j2 | 2 | ||||
-rw-r--r-- | roles/openshift_node/tasks/main.yml | 6 | ||||
-rw-r--r-- | roles/os_zabbix/tasks/main.yml | 13 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 8 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_zagg_server.yml | 36 |
6 files changed, 64 insertions, 12 deletions
diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index bd3d8f90c..43647cc49 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -135,13 +135,11 @@ template: src: atomic-openshift-master-api.service.j2 dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-api.service - force: no when: openshift_master_ha | bool and openshift_master_cluster_method == "native" - name: Create the controllers service file template: src: atomic-openshift-master-controllers.service.j2 dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-controllers.service - force: no when: openshift_master_ha | bool and openshift_master_cluster_method == "native" - name: Create the api env file template: @@ -230,7 +228,7 @@ register: start_result - set_fact: - master_service_status_changed = start_result | changed + master_service_status_changed: start_result | changed when: not openshift_master_ha | bool - name: Start and enable master api @@ -239,19 +237,16 @@ register: start_result - set_fact: - master_api_service_status_changed = start_result | changed + master_api_service_status_changed: start_result | changed when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' -# TODO: fix the ugly workaround of setting ignore_errors -# the controllers service tries to start even if it is already started - name: Start and enable master controller service: name={{ openshift.common.service_type }}-master-controllers enabled=yes state=started when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' register: start_result - ignore_errors: yes - set_fact: - master_controllers_service_status_changed = start_result | changed + master_controllers_service_status_changed: start_result | changed when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' - name: Install cluster packages diff --git a/roles/openshift_master/templates/atomic-openshift-master-controllers.service.j2 b/roles/openshift_master/templates/atomic-openshift-master-controllers.service.j2 index 8952c86ef..ef0b57ef4 100644 --- a/roles/openshift_master/templates/atomic-openshift-master-controllers.service.j2 +++ b/roles/openshift_master/templates/atomic-openshift-master-controllers.service.j2 @@ -7,7 +7,7 @@ Before={{ openshift.common.service_type }}-node.service Requires=network.target [Service] -Type=notify +Type=simple EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master-controllers Environment=GOTRACEBACK=crash ExecStart=/usr/bin/openshift start master controllers --config=${CONFIG_FILE} $OPTIONS diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index eef7bec9a..38bffc2e5 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -85,11 +85,11 @@ docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') | oo_split() | union(['registry.access.redhat.com']) | difference(['']) }}" - when: openshift.common.deployment_type == 'enterprise' + when: openshift.common.deployment_type in ['enterprise', 'openshift-enterprise', 'atomic-enterprise'] - set_fact: docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') | oo_split() | difference(['']) }}" - when: openshift.common.deployment_type != 'enterprise' + when: openshift.common.deployment_type not in ['enterprise', 'openshift-enterprise', 'atomic-enterprise'] - name: Add personal registries lineinfile: @@ -131,4 +131,4 @@ register: start_result - set_fact: - node_service_status_changed = start_result | changed + node_service_status_changed: start_result | changed diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index d0b307a3d..7552086d4 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -37,6 +37,9 @@ - include_vars: template_aws.yml tags: - aws +- include_vars: template_zagg_server.yml + tags: + - zagg_server - name: Include Template Heartbeat include: ../../lib_zabbix/tasks/create_template.yml @@ -137,3 +140,13 @@ password: "{{ ozb_password }}" tags: - aws + +- name: Include Template Zagg Server + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_zagg_server }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + tags: + - zagg_server diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 514d6fd24..a0ba8d104 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -269,6 +269,14 @@ g_template_openshift_master: - 'Openshift Master process not running on {HOST.NAME}' priority: avg + - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}' + expression: '{Template Openshift Master:create_app.sum(1h)}>3' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' + dependencies: + - 'Openshift Master process not running on {HOST.NAME}' + description: The application create loop has failed 4 or more times in the last hour + priority: avg + - name: 'Openshift Master API health check is failing on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml new file mode 100644 index 000000000..0e8e53bb7 --- /dev/null +++ b/roles/os_zabbix/vars/template_zagg_server.yml @@ -0,0 +1,36 @@ +--- +g_template_zagg_server: + name: Template Zagg Server + zitems: + - key: zagg.server.metrics.count + applications: + - Zagg Server + value_type: int + + - key: zagg.server.processor.errors + applications: + - Zagg Server + value_type: int + + - key: zagg.server.heartbeat.count + applications: + - Zagg Server + value_type: int + + ztriggers: + - name: 'Error sending metrics on {HOST.NAME}' + expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' + priority: average + + - name: 'Critically High number of metrics in Zagg queue {HOST.NAME}' + expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>10000' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' + priority: high + + - name: 'High number of metrics in Zagg queue {HOST.NAME}' + expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>5000' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' + dependencies: + - 'Critically High number of metrics in Zagg queue {HOST.NAME}' + priority: average |