From d0b167bd075eda5ffa104229103dfad772e9f403 Mon Sep 17 00:00:00 2001 From: Chengcheng Mu Date: Tue, 18 Aug 2015 10:46:23 +0200 Subject: fixed some issues to boot VM on GCE corrected openshift master config, commented infra correct list and terminate, it was bugged in case where no instance were terminated Using openshift-sdn for gce new join_node playbook for gce openstack/hosts/nova.py is now taking the nova/ini of its directory and not the directory of execution of bin/cluster add fix of ICMP reject rules Avoid a recursive loop Jenkins image was renamed Default masters to t2.medium instead of t2.small Fix a minor bug involving AWS ENV Keys * If a user forgot to set their AWS keys, we'd get a non descriptive error about a variable not being set * This patch uses the correct variable so the error message is more informative delete some fix that are needed anymore (selinux, iptables rules for sdn) GCE : all variables needed are in gce.ini, it will be used by bin/cluster (now check better the presence of gce.init in the default place or use GCE_INI_PATH to locate it ), also by gce.ini openshift_node_labels : get from oo_option fix syntax error in bin/cluster fix lookup for openshift_node_labels Adding desc, multiplier, and units to zabbix item Adding capability to have descriptions on triggers updated triggers and items to have better descriptions and multipliers Move openshift_data_dir to a fact based on deployment_type Previously this was being set to /var/lib/origin regardless of deployment_type which isn't correct given that existing 'enterprise' and 'online' deployments would have been deployed with /var/lib/openshift Verify again that ansible version is different than 1.9.0 and 1.9.0.1 bin/cluste does not take -a and -s anymore fix master_public_api_url : using by default a correct url Really fixed master public api url this time Really fixed master public api url this time uncommented infra deployment like before fixed again masterpublicurl in a template README_GCE.md : use GCE_INI_PATH in order to locate gce.ini, update description of gce.ini --- roles/lib_zabbix/library/zbx_item.py | 33 +++++- roles/lib_zabbix/library/zbx_trigger.py | 8 +- roles/lib_zabbix/tasks/create_template.yml | 8 +- roles/openshift_common/vars/main.yml | 2 - .../image-streams/image-streams-centos7.json | 14 +-- .../image-streams/image-streams-rhel7.json | 4 +- .../jenkins-ephemeral-template.json | 7 +- .../jenkins-persistent-template.json | 7 +- roles/openshift_facts/library/openshift_facts.py | 9 ++ roles/openshift_facts/tasks/main.yml | 2 +- roles/openshift_manage_node/tasks/main.yml | 2 +- roles/openshift_master/tasks/main.yml | 11 +- roles/openshift_master/templates/master.yaml.v1.j2 | 2 +- roles/openshift_master/vars/main.yml | 2 +- roles/openshift_master_ca/vars/main.yml | 2 +- roles/openshift_node/tasks/main.yml | 8 +- roles/openshift_node/templates/node.yaml.v1.j2 | 2 +- roles/openshift_node/vars/main.yml | 2 +- roles/os_zabbix/vars/template_docker.yml | 12 +- roles/os_zabbix/vars/template_heartbeat.yml | 2 +- roles/os_zabbix/vars/template_openshift_master.yml | 2 +- roles/os_zabbix/vars/template_os_linux.yml | 124 +++++++++++++-------- 22 files changed, 181 insertions(+), 84 deletions(-) (limited to 'roles') diff --git a/roles/lib_zabbix/library/zbx_item.py b/roles/lib_zabbix/library/zbx_item.py index 388db31b9..11e3c7b2b 100644 --- a/roles/lib_zabbix/library/zbx_item.py +++ b/roles/lib_zabbix/library/zbx_item.py @@ -88,6 +88,23 @@ def get_template_id(zapi, template_name): return template_ids, app_ids +def get_multiplier(inval): + ''' Determine the multiplier + ''' + if inval == None or inval == '': + return None, None + + rval = None + try: + rval = int(inval) + except ValueError: + pass + + if rval: + return rval, True + + return rval, False + # The branches are needed for CRUD and error handling # pylint: disable=too-many-branches def main(): @@ -106,6 +123,9 @@ def main(): template_name=dict(default=None, type='str'), zabbix_type=dict(default=2, type='int'), value_type=dict(default='int', type='str'), + multiplier=dict(default=None, type='str'), + description=dict(default=None, type='str'), + units=dict(default=None, type='str'), applications=dict(default=None, type='list'), state=dict(default='present', type='str'), ), @@ -137,11 +157,15 @@ def main(): 'templateids': templateid, }) - # Get + #******# + # GET + #******# if state == 'list': module.exit_json(changed=False, results=content['result'], state="list") - # Delete + #******# + # DELETE + #******# if state == 'absent': if not exists(content): module.exit_json(changed=False, state="absent") @@ -152,12 +176,17 @@ def main(): # Create and Update if state == 'present': + formula, use_multiplier = get_multiplier(module.params['multiplier']) params = {'name': module.params.get('name', module.params['key']), 'key_': module.params['key'], 'hostid': templateid[0], 'type': module.params['zabbix_type'], 'value_type': get_value_type(module.params['value_type']), 'applications': get_app_ids(module.params['applications'], app_name_ids), + 'formula': formula, + 'multiplier': use_multiplier, + 'description': module.params['description'], + 'units': module.params['units'], } # Remove any None valued params diff --git a/roles/lib_zabbix/library/zbx_trigger.py b/roles/lib_zabbix/library/zbx_trigger.py index c384f6fa3..a05de7e68 100644 --- a/roles/lib_zabbix/library/zbx_trigger.py +++ b/roles/lib_zabbix/library/zbx_trigger.py @@ -98,6 +98,7 @@ def main(): zbx_password=dict(default=os.environ.get('ZABBIX_PASSWORD', None), type='str'), zbx_debug=dict(default=False, type='bool'), expression=dict(default=None, type='str'), + name=dict(default=None, type='str'), description=dict(default=None, type='str'), dependencies=dict(default=[], type='list'), priority=dict(default='avg', type='str'), @@ -116,11 +117,11 @@ def main(): zbx_class_name = 'trigger' idname = "triggerid" state = module.params['state'] - description = module.params['description'] + tname = module.params['name'] content = zapi.get_content(zbx_class_name, 'get', - {'filter': {'description': description}, + {'filter': {'description': tname}, 'expandExpression': True, 'selectDependencies': 'triggerid', }) @@ -138,7 +139,8 @@ def main(): # Create and Update if state == 'present': - params = {'description': description, + params = {'description': tname, + 'comments': module.params['description'], 'expression': module.params['expression'], 'dependencies': get_deps(zapi, module.params['dependencies']), 'priority': get_priority(module.params['priority']), diff --git a/roles/lib_zabbix/tasks/create_template.yml b/roles/lib_zabbix/tasks/create_template.yml index bc9aff997..fd0cdd46f 100644 --- a/roles/lib_zabbix/tasks/create_template.yml +++ b/roles/lib_zabbix/tasks/create_template.yml @@ -30,6 +30,9 @@ key: "{{ item.key }}" name: "{{ item.name | default(item.key, true) }}" value_type: "{{ item.value_type | default('int') }}" + description: "{{ item.description | default('', True) }}" + multiplier: "{{ item.multiplier | default('', True) }}" + units: "{{ item.units | default('', True) }}" template_name: "{{ template.name }}" applications: "{{ item.applications }}" with_items: template.zitems @@ -41,8 +44,9 @@ zbx_server: "{{ server }}" zbx_user: "{{ user }}" zbx_password: "{{ password }}" - description: "{{ item.description }}" - dependencies: "{{ item.dependencies | default([], true) }}" + name: "{{ item.name }}" + description: "{{ item.description | default('', True) }}" + dependencies: "{{ item.dependencies | default([], True) }}" expression: "{{ item.expression }}" priority: "{{ item.priority }}" url: "{{ item.url | default(None, True) }}" diff --git a/roles/openshift_common/vars/main.yml b/roles/openshift_common/vars/main.yml index 817fe0a5f..50816d319 100644 --- a/roles/openshift_common/vars/main.yml +++ b/roles/openshift_common/vars/main.yml @@ -5,5 +5,3 @@ # chains with the public zone (or the zone associated with the correct # interfaces) os_firewall_use_firewalld: False - -openshift_data_dir: /var/lib/origin diff --git a/roles/openshift_examples/files/examples/image-streams/image-streams-centos7.json b/roles/openshift_examples/files/examples/image-streams/image-streams-centos7.json index 03affbddf..f213d99ca 100644 --- a/roles/openshift_examples/files/examples/image-streams/image-streams-centos7.json +++ b/roles/openshift_examples/files/examples/image-streams/image-streams-centos7.json @@ -161,19 +161,19 @@ "creationTimestamp": null }, "spec": { - "dockerImageRepository": "openshift/wildfly-8-centos", + "dockerImageRepository": "openshift/wildfly-81-centos7", "tags": [ { "name": "latest" }, { - "name": "8", + "name": "8.1", "annotations": { - "description": "Build and run Java applications on Wildfly 8", + "description": "Build and run Java applications on Wildfly 8.1", "iconClass": "icon-wildfly", "tags": "builder,wildfly,java", - "supports":"wildfly:8,jee,java", - "version": "8" + "supports":"wildfly:8.1,jee,java", + "version": "8.1" }, "from": { "Kind": "ImageStreamTag", @@ -260,13 +260,13 @@ "creationTimestamp": null }, "spec": { - "dockerImageRepository": "openshift/jenkins-16-centos7", + "dockerImageRepository": "openshift/jenkins-1-centos7", "tags": [ { "name": "latest" }, { - "name": "1.6", + "name": "1", "from": { "Kind": "ImageStreamTag", "Name": "latest" diff --git a/roles/openshift_examples/files/examples/image-streams/image-streams-rhel7.json b/roles/openshift_examples/files/examples/image-streams/image-streams-rhel7.json index 0bd885af3..8c125f76a 100644 --- a/roles/openshift_examples/files/examples/image-streams/image-streams-rhel7.json +++ b/roles/openshift_examples/files/examples/image-streams/image-streams-rhel7.json @@ -230,13 +230,13 @@ "creationTimestamp": null }, "spec": { - "dockerImageRepository": "registry.access.redhat.com/openshift3/jenkins-16-rhel7", + "dockerImageRepository": "registry.access.redhat.com/openshift3/jenkins-1-rhel7", "tags": [ { "name": "latest" }, { - "name": "1.6", + "name": "1", "from": { "Kind": "ImageStreamTag", "Name": "latest" diff --git a/roles/openshift_examples/files/examples/quickstart-templates/jenkins-ephemeral-template.json b/roles/openshift_examples/files/examples/quickstart-templates/jenkins-ephemeral-template.json index da08ffbd5..14bd032af 100644 --- a/roles/openshift_examples/files/examples/quickstart-templates/jenkins-ephemeral-template.json +++ b/roles/openshift_examples/files/examples/quickstart-templates/jenkins-ephemeral-template.json @@ -88,7 +88,7 @@ "containers": [ { "name": "jenkins", - "image": "openshift/jenkins-16-centos7", + "image": "${JENKINS_IMAGE}", "env": [ { "name": "JENKINS_PASSWORD", @@ -132,6 +132,11 @@ "description": "Jenkins service name", "value": "jenkins" }, + { + "name": "JENKINS_IMAGE", + "description": "Jenkins Docker image to use", + "value": "openshift/jenkins-1-centos7" + }, { "name": "JENKINS_PASSWORD", "description": "Password for the Jenkins user", diff --git a/roles/openshift_examples/files/examples/quickstart-templates/jenkins-persistent-template.json b/roles/openshift_examples/files/examples/quickstart-templates/jenkins-persistent-template.json index 33df68c74..fa31de486 100644 --- a/roles/openshift_examples/files/examples/quickstart-templates/jenkins-persistent-template.json +++ b/roles/openshift_examples/files/examples/quickstart-templates/jenkins-persistent-template.json @@ -105,7 +105,7 @@ "containers": [ { "name": "jenkins", - "image": "openshift/jenkins-16-centos7", + "image": "${JENKINS_IMAGE}", "env": [ { "name": "JENKINS_PASSWORD", @@ -155,6 +155,11 @@ "generate": "expression", "value": "password" }, + { + "name": "JENKINS_IMAGE", + "description": "Jenkins Docker image to use", + "value": "openshift/jenkins-1-centos7" + }, { "name": "VOLUME_CAPACITY", "description": "Volume space available for data, e.g. 512Mi, 2Gi", diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 679c3273a..60d1226d4 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -454,6 +454,8 @@ def set_deployment_facts_if_unset(facts): dict: the facts dict updated with the generated deployment_type facts """ + # Perhaps re-factor this as a map? + # pylint: disable=too-many-branches if 'common' in facts: deployment_type = facts['common']['deployment_type'] if 'service_type' not in facts['common']: @@ -470,6 +472,13 @@ def set_deployment_facts_if_unset(facts): elif deployment_type == 'origin': config_base = '/etc/openshift' facts['common']['config_base'] = config_base + if 'data_dir' not in facts['common']: + data_dir = '/var/lib/origin' + if deployment_type in ['enterprise', 'online']: + data_dir = '/var/lib/openshift' + elif deployment_type == 'origin': + data_dir = '/var/lib/openshift' + facts['common']['data_dir'] = data_dir for role in ('master', 'node'): if role in facts: diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index fd3d20800..6301d4fc0 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -1,5 +1,5 @@ --- -- name: Verify Ansible version is greater than 1.8.0 and not 1.9.0 +- name: Verify Ansible version is greater than 1.8.0 and not 1.9.0 and not 1.9.0.1 assert: that: - ansible_version | version_compare('1.8.0', 'ge') diff --git a/roles/openshift_manage_node/tasks/main.yml b/roles/openshift_manage_node/tasks/main.yml index 7c4f45ce6..94d7879b2 100644 --- a/roles/openshift_manage_node/tasks/main.yml +++ b/roles/openshift_manage_node/tasks/main.yml @@ -3,7 +3,7 @@ {{ openshift.common.client_binary }} get node {{ item }} register: omd_get_node until: omd_get_node.rc == 0 - retries: 10 + retries: 20 delay: 5 with_items: openshift_nodes diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index b57711b58..3394d13e5 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -8,6 +8,15 @@ - openshift_master_oauth_grant_method in openshift_master_valid_grant_methods when: openshift_master_oauth_grant_method is defined +- name: Displaying openshift_master_ha + debug: var=openshift_master_ha + +- name: openshift_master_cluster_password + debug: var=openshift_master_cluster_password + +- name: openshift.master.cluster_defer_ha + debug: var=openshift.master.cluster_defer_ha + - fail: msg: "openshift_master_cluster_password must be set for multi-master installations" when: openshift_master_ha | bool and not openshift.master.cluster_defer_ha | bool and openshift_master_cluster_password is not defined @@ -23,7 +32,7 @@ api_port: "{{ openshift_master_api_port | default(None) }}" api_url: "{{ openshift_master_api_url | default(None) }}" api_use_ssl: "{{ openshift_master_api_use_ssl | default(None) }}" - public_api_url: "{{ openshift_master_public_api_url | default(None) }}" + public_api_url: "{{ openshift_master_public_api_url | default('https://' ~ openshift.common.public_ip ~ ':8443') }}" console_path: "{{ openshift_master_console_path | default(None) }}" console_port: "{{ openshift_master_console_port | default(None) }}" console_url: "{{ openshift_master_console_url | default(None) }}" diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2 index 7aa4ce9b6..500690523 100644 --- a/roles/openshift_master/templates/master.yaml.v1.j2 +++ b/roles/openshift_master/templates/master.yaml.v1.j2 @@ -46,7 +46,7 @@ etcdConfig: certFile: etcd.server.crt clientCA: ca.crt keyFile: etcd.server.key - storageDirectory: {{ openshift_data_dir }}/openshift.local.etcd + storageDirectory: {{ openshift.common.data_dir }}/openshift.local.etcd {% endif %} etcdStorageConfig: kubernetesStoragePrefix: kubernetes.io diff --git a/roles/openshift_master/vars/main.yml b/roles/openshift_master/vars/main.yml index cce118cf1..ecdb4f883 100644 --- a/roles/openshift_master/vars/main.yml +++ b/roles/openshift_master/vars/main.yml @@ -3,7 +3,7 @@ openshift_master_config_dir: "{{ openshift.common.config_base }}/master" openshift_master_config_file: "{{ openshift_master_config_dir }}/master-config.yaml" openshift_master_scheduler_conf: "{{ openshift_master_config_dir }}/scheduler.json" openshift_master_policy: "{{ openshift_master_config_dir }}/policy.json" -openshift_version: "{{ openshift_version | default('') }}" +openshift_version: "{{ openshift_pkg_version | default('') }}" openshift_master_valid_grant_methods: - auto diff --git a/roles/openshift_master_ca/vars/main.yml b/roles/openshift_master_ca/vars/main.yml index 9e9561e02..b35339b18 100644 --- a/roles/openshift_master_ca/vars/main.yml +++ b/roles/openshift_master_ca/vars/main.yml @@ -3,4 +3,4 @@ openshift_master_config_dir: "{{ openshift.common.config_base }}/master" openshift_master_ca_cert: "{{ openshift_master_config_dir }}/ca.crt" openshift_master_ca_key: "{{ openshift_master_config_dir }}/ca.key" openshift_master_ca_serial: "{{ openshift_master_config_dir }}/ca.serial.txt" -openshift_version: "{{ openshift_version | default('') }}" +openshift_version: "{{ openshift_pkg_version | default('') }}" diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 1986b631e..b32c3a4b3 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -22,7 +22,7 @@ deployment_type: "{{ openshift_deployment_type }}" - role: node local_facts: - labels: "{{ openshift_node_labels | default(none) }}" + labels: "{{ lookup('oo_option', 'openshift_node_labels') | default( openshift_node_labels | default() ) }}" annotations: "{{ openshift_node_annotations | default(none) }}" registry_url: "{{ oreg_url | default(none) }}" debug_level: "{{ openshift_node_debug_level | default(openshift.common.debug_level) }}" @@ -72,6 +72,12 @@ dest: /etc/sysconfig/docker regexp: '^OPTIONS=.*$' line: "OPTIONS='--insecure-registry={{ openshift.node.portal_net }} \ +--insecure-registry=dockerhub.rnd.amadeus.net:5000 \ +--insecure-registry=dockerhub.rnd.amadeus.net:5001 \ +--insecure-registry=dockerhub.rnd.amadeus.net:5002 \ +--add-registry=dockerhub.rnd.amadeus.net:5000 \ +--add-registry=dockerhub.rnd.amadeus.net:5001 \ +--add-registry=dockerhub.rnd.amadeus.net:5002 \ {% if ansible_selinux and ansible_selinux.status == '''enabled''' %}--selinux-enabled{% endif %}'" when: docker_check.stat.isreg notify: diff --git a/roles/openshift_node/templates/node.yaml.v1.j2 b/roles/openshift_node/templates/node.yaml.v1.j2 index 0f708fb55..07d80f99b 100644 --- a/roles/openshift_node/templates/node.yaml.v1.j2 +++ b/roles/openshift_node/templates/node.yaml.v1.j2 @@ -25,5 +25,5 @@ servingInfo: certFile: server.crt clientCA: ca.crt keyFile: server.key -volumeDirectory: {{ openshift_data_dir }}/openshift.local.volumes +volumeDirectory: {{ openshift.common.data_dir }}/openshift.local.volumes {% include 'partials/kubeletArguments.j2' %} diff --git a/roles/openshift_node/vars/main.yml b/roles/openshift_node/vars/main.yml index db2c3e15a..43dc50ca8 100644 --- a/roles/openshift_node/vars/main.yml +++ b/roles/openshift_node/vars/main.yml @@ -1,4 +1,4 @@ --- openshift_node_config_dir: "{{ openshift.common.config_base }}/node" openshift_node_config_file: "{{ openshift_node_config_dir }}/node-config.yaml" -openshift_version: "{{ openshift_version | default('') }}" +openshift_version: "{{ openshift_pkg_version | default('') }}" diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index a1cd3519e..395e054de 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -52,35 +52,35 @@ g_template_docker: - Docker Storage value_type: float ztriggers: - - description: 'docker.ping failed on {HOST.NAME}' + - name: 'docker.ping failed on {HOST.NAME}' expression: '{Template Docker:docker.ping.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc' priority: high - - description: 'Docker storage is using LOOPBACK on {HOST.NAME}' + - name: 'Docker storage is using LOOPBACK on {HOST.NAME}' expression: '{Template Docker:docker.storage.is_loopback.last()}<>0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc' priority: high - - description: 'Critically low docker storage data space on {HOST.NAME}' + - name: 'Critically low docker storage data space on {HOST.NAME}' expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.data.space.available.max(#3)}<5' # < 5% or < 5GB url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' priority: high - - description: 'Critically low docker storage metadata space on {HOST.NAME}' + - name: 'Critically low docker storage metadata space on {HOST.NAME}' expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.005' # < 5% or < 5MB url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' priority: high # Put triggers that depend on other triggers here (deps must be created first) - - description: 'Low docker storage data space on {HOST.NAME}' + - name: 'Low docker storage data space on {HOST.NAME}' expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.data.space.available.max(#3)}<10' # < 10% or < 10GB url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' dependencies: - 'Critically low docker storage data space on {HOST.NAME}' priority: average - - description: 'Low docker storage metadata space on {HOST.NAME}' + - name: 'Low docker storage metadata space on {HOST.NAME}' expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.01' # < 10% or < 10MB url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' dependencies: diff --git a/roles/os_zabbix/vars/template_heartbeat.yml b/roles/os_zabbix/vars/template_heartbeat.yml index 798377cd9..8dbe0d0d6 100644 --- a/roles/os_zabbix/vars/template_heartbeat.yml +++ b/roles/os_zabbix/vars/template_heartbeat.yml @@ -7,7 +7,7 @@ g_template_heartbeat: - Heartbeat key: heartbeat.ping ztriggers: - - description: 'Heartbeat.ping has failed on {HOST.NAME}' + - name: 'Heartbeat.ping has failed on {HOST.NAME}' expression: '{Template Heartbeat:heartbeat.ping.nodata(20m)}=1' priority: avg url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_node_heartbeat.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index d2c1365b0..728423ac1 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -7,7 +7,7 @@ g_template_openshift_master: - Openshift Master key: create_app ztriggers: - - description: 'Application creation has failed on {HOST.NAME}' + - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' priority: avg diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 7c446cd85..3173c79b2 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -52,106 +52,135 @@ g_template_os_linux: - Kernel value_type: float - - key: mem.freemem + - key: kernel.all.cpu.nice applications: - - Memory + - Kernel value_type: int - - key: kernel.all.cpu.nice + - key: kernel.all.load.1_minute applications: - Kernel - value_type: int + value_type: float - - key: mem.util.bufmem + - key: kernel.uname.version applications: - - Memory - value_type: int + - Kernel + value_type: string - - key: swap.used + - key: kernel.all.uptime applications: - - Memory + - Kernel value_type: int - - key: kernel.all.load.1_minute + - key: kernel.all.cpu.user applications: - Kernel - value_type: float + value_type: int - - key: kernel.uname.version + - key: kernel.uname.machine applications: - Kernel value_type: string - - key: swap.length + - key: hinv.ncpu applications: - - Memory + - Kernel value_type: int - - key: mem.physmem + - key: kernel.all.cpu.steal applications: - - Memory + - Kernel value_type: int - - key: kernel.all.uptime + - key: kernel.all.pswitch applications: - Kernel value_type: int - - key: swap.free + - key: kernel.uname.release applications: - - Memory - value_type: int + - Kernel + value_type: string - - key: mem.util.available + - key: proc.nprocs applications: - - Memory + - Kernel value_type: int - - key: mem.util.used + # Memory Items + - key: mem.freemem applications: - Memory value_type: int + description: "PCP: free system memory metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: kernel.all.cpu.user + - key: mem.util.bufmem applications: - - Kernel + - Memory value_type: int + description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: kernel.uname.machine + - key: swap.used applications: - - Kernel - value_type: string + - Memory + value_type: int + description: "PCP: swap used metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: hinv.ncpu + - key: swap.length applications: - - Kernel + - Memory value_type: int + description: "PCP: total swap available metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: mem.util.cached + - key: mem.physmem applications: - Memory value_type: int + description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM." + multiplier: 1024 + units: B - - key: kernel.all.cpu.steal + - key: swap.free applications: - - Kernel + - Memory value_type: int + description: "PCP: swap free metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: kernel.all.pswitch + - key: mem.util.available applications: - - Kernel + - Memory value_type: int + description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo" + multiplier: 1024 + units: B - - key: kernel.uname.release + - key: mem.util.used applications: - - Kernel - value_type: string + - Memory + value_type: int + description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo" + multiplier: 1024 + units: B - - key: proc.nprocs + - key: mem.util.cached applications: - - Kernel + - Memory value_type: int + description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo" + multiplier: 1024 + units: B + # Disk items - key: filesys.full.xvda2 applications: - Disk @@ -163,32 +192,33 @@ g_template_os_linux: value_type: float ztriggers: - - description: 'Filesystem: / has less than 10% free on {HOST.NAME}' + - name: 'Filesystem: / has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - description: 'Filesystem: / has less than 5% free on {HOST.NAME}' + - name: 'Filesystem: / has less than 5% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>95' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - - description: 'Filesystem: /var has less than 10% free on {HOST.NAME}' + - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda3.last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - description: 'Filesystem: /var has less than 5% free on {HOST.NAME}' + - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - - description: 'Too many TOTAL processes on {HOST.NAME}' + - name: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' priority: warn - - description: 'Lack of available memory on {HOST.NAME}' - expression: '{Template OS Linux:mem.freemem.last()}<3000' + - name: 'Lack of available memory on {HOST.NAME}' + expression: '{Template OS Linux:mem.freemem.last()}<30720000' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc' priority: warn + description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024' -- cgit v1.2.3