diff options
19 files changed, 162 insertions, 73 deletions
diff --git a/.tito/packages/openshift-ansible b/.tito/packages/openshift-ansible index e6a0b6e80..a9b7ba843 100644 --- a/.tito/packages/openshift-ansible +++ b/.tito/packages/openshift-ansible @@ -1 +1 @@ -3.0.58-1 ./ +3.0.61-1 ./ diff --git a/openshift-ansible.spec b/openshift-ansible.spec index bad7e68b2..f01e13f6e 100644 --- a/openshift-ansible.spec +++ b/openshift-ansible.spec @@ -5,7 +5,7 @@  }  Name:           openshift-ansible -Version:        3.0.58 +Version:        3.0.61  Release:        1%{?dist}  Summary:        Openshift and Atomic Enterprise Ansible  License:        ASL 2.0 @@ -279,6 +279,31 @@ Atomic OpenShift Utilities includes  %changelog +* Thu Mar 17 2016 Troy Dawson <tdawson@redhat.com> 3.0.61-1 +- Bug 1317755 - Set insecure-registry for internal registry by default +  (jdetiber@redhat.com) + +* Wed Mar 16 2016 Brenton Leanhardt <bleanhar@redhat.com> 3.0.60-1 +- Fall back to deployment_type in openshift_facts. (abutcher@redhat.com) +- Fixing undefined variable check (kwoodson@redhat.com) +- Fix path to cacert on /healthz/ready check (sdodson@redhat.com) +- Load environment files in containerized installs (sdodson@redhat.com) +- change type to value_type (zhizhang@zhizhang-laptop-nay.redhat.com) +- change time from int to float (zhizhang@zhizhang-laptop-nay.redhat.com) +- change the check time from 1 hour to 2 hour (zhizhang@zhizhang-laptop- +  nay.redhat.com) +- add item of time cost a app build and app create (zhizhang@zhizhang-laptop- +  nay.redhat.com) +- add trigger for app creation with build process (zhizhang@zhizhang-laptop- +  nay.redhat.com) +- add key of openshift.master.app.build.create (zhizhang@zhizhang-laptop- +  nay.redhat.com) + +* Wed Mar 16 2016 Brenton Leanhardt <bleanhar@redhat.com> 3.0.59-1 +- Only mask etcd service for containerized installls when it's installed +  (sdodson@redhat.com) +- Provide cacert when performing health checks (abutcher@redhat.com) +  * Tue Mar 15 2016 Kenny Woodson <kwoodson@redhat.com> 3.0.58-1  - Group selector feature added (kwoodson@redhat.com)  - nfs: replace yum with dnf (efreiber@redhat.com) diff --git a/playbooks/common/openshift-master/scaleup.yml b/playbooks/common/openshift-master/scaleup.yml index ccb1d23f1..6e6cb3e01 100644 --- a/playbooks/common/openshift-master/scaleup.yml +++ b/playbooks/common/openshift-master/scaleup.yml @@ -33,7 +33,8 @@      service: name={{ openshift.common.service_type }}-master-controllers state=restarted    - name: verify api server      command: > -      curl -k --silent {{ openshift.master.api_url }}/healthz/ready +      curl --silent --cacert {{ openshift.common.config_base }}/master/ca.crt +      {{ openshift.master.api_url }}/healthz/ready      register: api_available_output      until: api_available_output.stdout == 'ok'      retries: 120 diff --git a/playbooks/common/openshift-node/config.yml b/playbooks/common/openshift-node/config.yml index 1f32f2786..7ca941732 100644 --- a/playbooks/common/openshift-node/config.yml +++ b/playbooks/common/openshift-node/config.yml @@ -115,6 +115,11 @@    vars:      openshift_node_master_api_url: "{{ hostvars[groups.oo_first_master.0].openshift.master.api_url }}"      openshift_node_first_master_ip: "{{ hostvars[groups.oo_first_master.0].openshift.common.ip }}" +    # TODO: configure these based on +    # hostvars[groups.oo_first_master.0].openshift.hosted.registry instead of +    # hardcoding +    openshift_docker_hosted_registry_insecure: True +    openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.master.portal_net }}"    roles:    - openshift_node @@ -123,6 +128,11 @@    vars:      openshift_node_master_api_url: "{{ hostvars[groups.oo_first_master.0].openshift.master.api_url }}"      openshift_node_first_master_ip: "{{ hostvars[groups.oo_first_master.0].openshift.common.ip }}" +    # TODO: configure these based on +    # hostvars[groups.oo_first_master.0].openshift.hosted.registry instead of +    # hardcoding +    openshift_docker_hosted_registry_insecure: True +    openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.master.portal_net }}"    roles:    - openshift_node @@ -256,7 +266,8 @@      # Using curl here since the uri module requires python-httplib2 and      # wait_for port doesn't provide health information.      command: > -      curl -k --silent {{ openshift.master.api_url }}/healthz/ready +      curl --silent --cacert {{ openshift.common.config_base }}/master/ca.crt +      {{ openshift.master.api_url }}/healthz/ready      register: api_available_output      until: api_available_output.stdout == 'ok'      retries: 120 diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml index 9f827417f..aff905bc8 100644 --- a/roles/docker/handlers/main.yml +++ b/roles/docker/handlers/main.yml @@ -4,7 +4,7 @@    service:      name: docker      state: restarted -  when: not docker_service_status_changed | default(false) +  when: not docker_service_status_changed | default(false) | bool  - name: restart udev    service: diff --git a/roles/docker/tasks/main.yml b/roles/docker/tasks/main.yml index 506cecfea..9709c5014 100644 --- a/roles/docker/tasks/main.yml +++ b/roles/docker/tasks/main.yml @@ -49,16 +49,16 @@      dest: /etc/sysconfig/docker      regexp: '^{{ item.reg_conf_var }}=.*$'      line: "{{ item.reg_conf_var }}='{{ item.reg_fact_val | oo_prepend_strings_in_list(item.reg_flag ~ ' ') | join(' ') }}'" -  when: item.reg_fact_val is defined and docker_check.stat.isreg +  when: item.reg_fact_val != '' and  docker_check.stat.isreg    with_items:    - reg_conf_var: ADD_REGISTRY -    reg_fact_val: "{{ docker_additional_registries }}" +    reg_fact_val: "{{ docker_additional_registries | default(None, true)}}"      reg_flag: --add-registry    - reg_conf_var: BLOCK_REGISTRY -    reg_fact_val: "{{ docker_blocked_registries }}" +    reg_fact_val: "{{ docker_blocked_registries| default(None, true) }}"      reg_flag: --block-registry    - reg_conf_var: INSECURE_REGISTRY -    reg_fact_val: "{{ docker_insecure_registries }}" +    reg_fact_val: "{{ docker_insecure_registries| default(None, true) }}"      reg_flag: --insecure-registry    notify:    - restart docker @@ -76,3 +76,5 @@    when: docker_check.stat.isreg    notify:      - restart docker + +- meta: flush_handlers diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index d6956de71..064544b03 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -36,8 +36,12 @@      state: stopped      enabled: no +- name: Check for etcd service presence +  command: systemctl show etcd.service +  register: etcd_show +    - name: Mask system etcd when containerized -  when: openshift.common.is_containerized | bool +  when: openshift.common.is_containerized | bool and 'LoadState=not-found' not in etcd_show.stdout    command: systemctl mask etcd  - name: Reload systemd units diff --git a/roles/openshift_docker_facts/tasks/main.yml b/roles/openshift_docker_facts/tasks/main.yml index ad7ad3748..26b46aa94 100644 --- a/roles/openshift_docker_facts/tasks/main.yml +++ b/roles/openshift_docker_facts/tasks/main.yml @@ -13,11 +13,9 @@        log_options: "{{ openshift_docker_log_options | default(None) }}"        options: "{{ openshift_docker_options | default(None) }}"        disable_push_dockerhub: "{{ openshift_disable_push_dockerhub | default(None) }}" -  - role: node -    local_facts: -      portal_net: "{{ openshift_master_portal_net | default(None) }}" +      hosted_registry_insecure: "{{ openshift_docker_hosted_registry_insecure | default(None) }}" +      hosted_registry_network: "{{ openshift_docker_hosted_registry_network | default(None) }}" -# TODO: append openshift.node.portal_net to docker_insecure_registries  - set_fact:      docker_additional_registries: "{{ openshift.docker.additional_registries                                        | default(omit) }}" @@ -27,6 +25,15 @@                                      | default(omit) }}"      docker_log_driver: "{{ openshift.docker.log_driver | default(omit) }}"      docker_log_options: "{{ openshift.docker.log_options | default(omit) }}" -    docker_options: "{{ openshift.docker.options | default(omit) }}"      docker_push_dockerhub: "{{ openshift.docker.disable_push_dockerhub                                 | default(omit) }}" + +- set_fact: +    docker_options: > +      --insecure-registry={{ openshift.docker.hosted_registry_network }} +      {{ openshift.docker.options | default ('') }} +  when: openshift.docker.hosted_registry_insecure | default(False) | bool + +- set_fact: +    docker_options: "{{ openshift.docker.options | default(omit) }}" +  when: not openshift.docker.hosted_registry_insecure | default(False) | bool diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index b06900681..263daf210 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -50,6 +50,10 @@ def migrate_docker_facts(facts):                  old_param = 'docker_' + param                  if old_param in facts[role]:                      facts['docker'][param] = facts[role].pop(old_param) + +    if 'node' in facts and 'portal_net' in facts['node']: +        facts['docker']['hosted_registry_insecure'] = True +        facts['docker']['hosted_registry_network'] = facts['node'].pop('portal_net')      return facts  def migrate_local_facts(facts): @@ -1402,7 +1406,6 @@ class OpenShiftFacts(object):          if 'node' in roles:              defaults['node'] = dict(labels={}, annotations={}, -                                    portal_net='172.30.0.0/16',                                      iptables_sync_period='5s',                                      set_node_ip=False) diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index 89ddb0d98..50e7e5747 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -25,7 +25,8 @@    openshift_facts:      role: common      local_facts: -      deployment_type: "{{ openshift_deployment_type }}" +      # TODO: Deprecate deployment_type in favor of openshift_deployment_type +      deployment_type: "{{ openshift_deployment_type | default(deployment_type) }}"        cluster_id: "{{ openshift_cluster_id | default('default') }}"        hostname: "{{ openshift_hostname | default(None) }}"        ip: "{{ openshift_ip | default(None) }}" diff --git a/roles/openshift_master/handlers/main.yml b/roles/openshift_master/handlers/main.yml index 4d1216aae..e5b9e4977 100644 --- a/roles/openshift_master/handlers/main.yml +++ b/roles/openshift_master/handlers/main.yml @@ -17,7 +17,8 @@    # Using curl here since the uri module requires python-httplib2 and    # wait_for port doesn't provide health information.    command: > -    curl -k --silent {{ openshift.master.api_url }}/healthz/ready +    curl --silent --cacert {{ openshift.common.config_base }}/master/ca.crt +    {{ openshift.master.api_url }}/healthz/ready    register: api_available_output    until: api_available_output.stdout == 'ok'    retries: 120 diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index 1f499dc93..9c3d09d09 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -282,7 +282,8 @@    # Using curl here since the uri module requires python-httplib2 and    # wait_for port doesn't provide health information.    command: > -    curl -k --silent {{ openshift.master.api_url }}/healthz/ready +    curl --silent --cacert {{ openshift.common.config_base }}/master/ca.crt +    {{ openshift.master.api_url }}/healthz/ready    register: api_available_output    until: api_available_output.stdout == 'ok'    retries: 120 diff --git a/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-api.service.j2 b/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-api.service.j2 index f777f7657..6a21a04ab 100644 --- a/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-api.service.j2 +++ b/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-api.service.j2 @@ -12,7 +12,7 @@ Requires=docker.service  EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master-api  Environment=GOTRACEBACK=crash  ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type}}-master-api -ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master-api -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master api --config=${CONFIG_FILE} $OPTIONS +ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master-api --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-master-api -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master api --config=${CONFIG_FILE} $OPTIONS  ExecStartPost=/usr/bin/sleep 10  ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-master-api  LimitNOFILE=131072 diff --git a/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-controllers.service.j2 b/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-controllers.service.j2 index 4fc48ba15..69f68d843 100644 --- a/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-controllers.service.j2 +++ b/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-controllers.service.j2 @@ -11,7 +11,7 @@ PartOf=docker.service  EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master-controllers  Environment=GOTRACEBACK=crash  ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type}}-master-controllers -ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master-controllers -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master controllers --config=${CONFIG_FILE} $OPTIONS +ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master-controllers --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-master-controllers -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master controllers --config=${CONFIG_FILE} $OPTIONS  ExecStartPost=/usr/bin/sleep 10  ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-master-controllers  LimitNOFILE=131072 diff --git a/roles/openshift_master/templates/docker/master.docker.service.j2 b/roles/openshift_master/templates/docker/master.docker.service.j2 index e9f4a4d21..b714fdeb7 100644 --- a/roles/openshift_master/templates/docker/master.docker.service.j2 +++ b/roles/openshift_master/templates/docker/master.docker.service.j2 @@ -8,7 +8,7 @@ Wants=etcd_container.service  [Service]  EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master  ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type }}-master -ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master --config=${CONFIG_FILE} $OPTIONS +ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-master -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master --config=${CONFIG_FILE} $OPTIONS  ExecStartPost=/usr/bin/sleep 10  ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-master  Restart=always diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 8768d426b..4b5832ab7 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -23,7 +23,6 @@        iptables_sync_period: "{{ openshift_node_iptables_sync_period | default(None) }}"        kubelet_args: "{{ openshift_node_kubelet_args | default(None) }}"        labels: "{{ lookup('oo_option', 'openshift_node_labels') | default( openshift_node_labels | default(none), true) }}" -      portal_net: "{{ openshift_master_portal_net | default(None) }}"        registry_url: "{{ oreg_url | default(none) }}"        schedulable: "{{ openshift_schedulable | default(openshift_scheduleable) | default(None) }}"        sdn_mtu: "{{ openshift_node_sdn_mtu | default(None) }}" @@ -126,7 +125,8 @@    # Using curl here since the uri module requires python-httplib2 and    # wait_for port doesn't provide health information.    command: > -    curl -k --silent {{ openshift_node_master_api_url }}/healthz/ready +    curl --silent --cacert {{ openshift.common.config_base }}/node/ca.crt +    {{ openshift_node_master_api_url }}/healthz/ready    register: api_available_output    until: api_available_output.stdout == 'ok'    retries: 120 diff --git a/roles/openshift_node/templates/openshift.docker.node.service b/roles/openshift_node/templates/openshift.docker.node.service index fa14cd770..53b1d6230 100644 --- a/roles/openshift_node/templates/openshift.docker.node.service +++ b/roles/openshift_node/templates/openshift.docker.node.service @@ -12,7 +12,7 @@ Wants={{ openshift.common.service_type }}-master.service  [Service]  EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-node  ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type }}-node -ExecStart=/usr/bin/docker run --name {{ openshift.common.service_type }}-node --rm --privileged --net=host --pid=host -v /:/rootfs:ro -e CONFIG_FILE=${CONFIG_FILE} -e OPTIONS=${OPTIONS} -e HOST=/rootfs -e HOST_ETC=/host-etc -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v {{ openshift.common.config_base }}/node:{{ openshift.common.config_base }}/node -v /etc/localtime:/etc/localtime:ro -v /etc/machine-id:/etc/machine-id:ro -v /run:/run -v /sys:/sys:ro -v /usr/bin/docker:/usr/bin/docker:ro -v /var/lib/docker:/var/lib/docker -v /lib/modules:/lib/modules -v /etc/origin/openvswitch:/etc/openvswitch -v /etc/origin/sdn:/etc/openshift-sdn  -v /etc/systemd/system:/host-etc/systemd/system -v /var/log:/var/log {{ openshift.node.node_image }}:${IMAGE_VERSION} +ExecStart=/usr/bin/docker run --name {{ openshift.common.service_type }}-node --rm --privileged --net=host --pid=host --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-node -v /:/rootfs:ro -e CONFIG_FILE=${CONFIG_FILE} -e OPTIONS=${OPTIONS} -e HOST=/rootfs -e HOST_ETC=/host-etc -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v {{ openshift.common.config_base }}/node:{{ openshift.common.config_base }}/node -v /etc/localtime:/etc/localtime:ro -v /etc/machine-id:/etc/machine-id:ro -v /run:/run -v /sys:/sys:ro -v /usr/bin/docker:/usr/bin/docker:ro -v /var/lib/docker:/var/lib/docker -v /lib/modules:/lib/modules -v /etc/origin/openvswitch:/etc/openvswitch -v /etc/origin/sdn:/etc/openshift-sdn  -v /etc/systemd/system:/host-etc/systemd/system -v /var/log:/var/log {{ openshift.node.node_image }}:${IMAGE_VERSION}  ExecStartPost=/usr/bin/sleep 10  ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-node  SyslogIdentifier={{ openshift.common.service_type }}-node diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index e36f23a2b..705066b35 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -6,258 +6,276 @@ g_template_openshift_master:      applications:      - Openshift Master      key: openshift.master.app.create +   +  - key: openshift.master.app.build.create +    description: "check the app create with a build process" +    value_type: int +    applications: +    - Openshift Master + +  - key: openshift.master.app.create.time +    description: "check the time it takes app create with a build process" +    value_type: float +    applications: +    - Openshift Master + +  - key: openshift.master.app.build.time +    description: "check the time it takes app build" +    value_type: float +    applications: +    - Openshift Master    - key: openshift.master.process.count      description: Shows number of master processes running -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.api.ping      description: "Verify that the Openshift API is up (uses the cluster API URL)" -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.local.api.ping      description: "Verify that the Openshift API is up on the host (uses the API URL as the https://127.0.0.1)" -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.api.healthz      description: "Checks the healthz check of the master's api: https://<cluster_api_url>/healthz" -    type: int +    value_type: int      data_type: bool      applications:      - Openshift Master    - key: openshift.master.local.api.healthz      description: "Checks the healthz check of the master's api: https://127.0.0.1/healthz" -    type: int +    value_type: int      data_type: bool      applications:      - Openshift Master    - key: openshift.master.user.count      description: Shows number of users in a cluster -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.pod.running.count      description: Shows number of pods running -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.pod.user.running.count      description: Shows number of user pods running (non infrastructure pods) -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.pod.total.count      description: Shows total number of pods (running and non running) -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.node.count      description: Shows the total number of nodes found in the Openshift Cluster -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.project.count      description: Shows number of projects on a cluster -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.pv.total.count      description: Total number of Persistent Volumes in the Openshift Cluster -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.pv.available.count      description: Total number of Available Persistent Volumes in the Openshift Cluster -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.pv.released.count      description: Total number of Released Persistent Volumes in the Openshift Cluster -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.pv.bound.count      description: Total number of Bound Persistent Volumes in the Openshift Cluster -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.pv.failed.count      description: Total number of Failed Persistent Volumes in the Openshift Cluster -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.skydns.port.open      description: State of the SkyDNS port open and listening -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.skydns.query      description: SkyDNS can be queried or not -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.etcd.create.success      description: Show number of successful create actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.create.fail      description: Show number of failed create actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.delete.success      description: Show number of successful delete actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.delete.fail      description: Show number of failed delete actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.get.success      description: Show number of successful get actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.get.fail      description: Show number of failed get actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.set.success      description: Show number of successful set actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.set.fail      description: Show number of failed set actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.update.success      description: Show number of successful update actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.update.fail      description: Show number of failed update actions -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.watchers      description: Show number of etcd watchers -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.etcd.ping      description: etcd ping -    type: int +    value_type: int      applications:      - Openshift Etcd    - key: openshift.master.metric.ping      description: "This check verifies that the https://master/metrics check is alive and communicating properly." -    type: int +    value_type: int      applications:      - Openshift Master Metrics    - key: openshift.master.nodesnotready.count      description: "This check shows how many nodes in a cluster are in NotReady state." -    type: int +    value_type: int      applications:       - Openshift Master    - key: openshift.master.nodesnotschedulable.count      description: "This check shows how many nodes in a cluster are not schedulable." -    type: int +    value_type: int      applications:      - Openshift Master    - key: openshift.master.apiserver.latency.summary.pods.quantile.list.5      description: "Value from https://master/metrics.  This is the time, in miliseconds, that 50% of the pod operations have taken to completed." -    type: int +    value_type: int      applications:      - Openshift Master Metrics    - key: openshift.master.apiserver.latency.summary.pods.quantile.list.9      description: "Value from https://master/metrics.  This is the time, in miliseconds, that 90% of the pod operations have taken to completed." -    type: int +    value_type: int      applications:      - Openshift Master Metrics    - key: openshift.master.apiserver.latency.summary.pods.quantile.list.99      description: "Value from https://master/metrics.  This is the time, in miliseconds, that 99% of the pod operations have taken to completed." -    type: int +    value_type: int      applications:      - Openshift Master Metrics    - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5      description: "Value from https://master/metrics.  This is the time, in miliseconds, that 50% of the pod operations have taken to completed." -    type: int +    value_type: int      applications:      - Openshift Master Metrics    - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9      description: "Value from https://master/metrics.  This is the time, in miliseconds, that 90% of the pod operations have taken to completed." -    type: int +    value_type: int      applications:      - Openshift Master Metrics    - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99      description: "Value from https://master/metrics.  This is the time, in miliseconds, that 99% of the pod operations have taken to completed." -    type: int +    value_type: int      applications:      - Openshift Master Metrics    - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.5      description: "Value from https://master/metrics.  This is the time, in miliseconds, that 50% of the end to end scheduling operations have taken to completed." -    type: int +    value_type: int      applications:      - Openshift Master Metrics    - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.9      description: "Value from https://master/metrics.  This is the time, in miliseconds, that 90% of the end to end scheduling operations have taken to completed." -    type: int +    value_type: int      applications:      - Openshift Master Metrics    - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.99      description: "Value from https://master/metrics.  This is the time, in miliseconds, that 99% of the end to end scheduling operations have taken to completed." -    type: int +    value_type: int      applications:      - Openshift Master Metrics @@ -295,6 +313,13 @@ g_template_openshift_master:      - 'Openshift Master process not running on {HOST.NAME}'      priority: avg +  - name: 'Application creation with build has failed on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.app.build.create.last(#1)}=1 and {Template Openshift Master:openshift.master.app.build.create.last(#2)}=1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}' +    priority: avg +    - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.app.create.sum(1h)}>3'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' @@ -303,6 +328,14 @@ g_template_openshift_master:      description: The application create loop has failed 4 or more times in the last hour      priority: avg +  - name: 'Application with build creation has failed multiple times in the last 2 hour on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.app.build.create.sum(2h)}>3' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}' +    description: The application create loop has failed 4 or more times in the last hour +    priority: avg +    - name: 'Openshift Master API health check is failing on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml index 66bd3a147..9f84a2cdf 100644 --- a/roles/os_zabbix/vars/template_openshift_node.yml +++ b/roles/os_zabbix/vars/template_openshift_node.yml @@ -4,37 +4,37 @@ g_template_openshift_node:    zitems:    - key: openshift.node.process.count      description: Shows number of OpenShift Node processes running -    type: int +    value_type: int      applications:      - Openshift Node    - key: openshift.node.ovs.pids.count      description: Shows number of ovs process ids running -    type: int +    value_type: int      applications:      - Openshift Node    - key: openshift.node.ovs.ports.count      description: Shows number of OVS ports defined -    type: int +    value_type: int      applications:      - Openshift Node    - key: openshift.node.ovs.stray.rules      description: Number of OVS stray rules found/removed -    type: int +    value_type: int      applications:      - Openshift Node    - key: openshift.node.registry-pods.healthy_pct      description: Shows the percentage of healthy registries in the cluster -    type: int +    value_type: int      applications:      - Openshift Node    - key: openshift.node.registry.service.ping      description: Ping docker-registry service from node -    type: int +    value_type: int      applications:      - Openshift Node  | 
