summaryrefslogtreecommitdiffstats
path: root/roles/openshift_prometheus
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_prometheus')
-rw-r--r--roles/openshift_prometheus/README.md118
-rw-r--r--roles/openshift_prometheus/defaults/main.yaml60
-rw-r--r--roles/openshift_prometheus/meta/main.yaml19
-rw-r--r--roles/openshift_prometheus/tasks/install_prometheus.yaml238
-rw-r--r--roles/openshift_prometheus/tasks/main.yaml26
-rw-r--r--roles/openshift_prometheus/templates/alertmanager.yml.j220
-rw-r--r--roles/openshift_prometheus/templates/prometheus.j2247
-rw-r--r--roles/openshift_prometheus/templates/prometheus.rules.j24
-rw-r--r--roles/openshift_prometheus/templates/prometheus.yml.j2174
-rw-r--r--roles/openshift_prometheus/tests/inventory2
-rw-r--r--roles/openshift_prometheus/tests/test.yaml5
11 files changed, 913 insertions, 0 deletions
diff --git a/roles/openshift_prometheus/README.md b/roles/openshift_prometheus/README.md
new file mode 100644
index 000000000..92f74928c
--- /dev/null
+++ b/roles/openshift_prometheus/README.md
@@ -0,0 +1,118 @@
+OpenShift Prometheus
+====================
+
+OpenShift Prometheus Installation
+
+Requirements
+------------
+
+
+Role Variables
+--------------
+
+For default values, see [`defaults/main.yaml`](defaults/main.yaml).
+
+- `openshift_prometheus_state`: present - install/update. absent - uninstall.
+
+- `openshift_prometheus_namespace`: project (i.e. namespace) where the components will be
+ deployed.
+
+- `openshift_prometheus_node_selector`: Selector for the nodes prometheus will be deployed on.
+
+- `openshift_prometheus_<COMPONENT>_image_prefix`: specify image prefix for the component
+
+- `openshift_prometheus_<COMPONENT>_image_version`: specify image version for the component
+
+## PVC related variables
+Each prometheus component (prometheus, alertmanager, alertbuffer) can set pv claim by setting corresponding role variable:
+```
+openshift_prometheus_<COMPONENT>_storage_type: <VALUE> (pvc, emptydir)
+openshift_prometheus_<COMPONENT>_pvc_(name|size|access_modes|pv_selector): <VALUE>
+```
+e.g
+```
+openshift_prometheus_storage_type: pvc
+openshift_prometheus_alertmanager_pvc_name: alertmanager
+openshift_prometheus_alertbuffer_pvc_size: 10G
+openshift_prometheus_pvc_access_modes: [ReadWriteOnce]
+```
+
+## NFS PV Storage variables
+Each prometheus component (prometheus, alertmanager, alertbuffer) can set nfs pv by setting corresponding variable:
+```
+openshift_prometheus_<COMPONENT>_storage_kind=<VALUE>
+openshift_prometheus_<COMPONENT>_storage_(access_modes|host|labels)=<VALUE>
+openshift_prometheus_<COMPONENT>_storage_volume_(name|size)=<VALUE>
+openshift_prometheus_<COMPONENT>_storage_nfs_(directory|options)=<VALUE>
+```
+e.g
+```
+openshift_prometheus_storage_kind=nfs
+openshift_prometheus_storage_access_modes=['ReadWriteOnce']
+openshift_prometheus_storage_host=nfs.example.com #for external host
+openshift_prometheus_storage_nfs_directory=/exports
+openshift_prometheus_storage_alertmanager_nfs_options='*(rw,root_squash)'
+openshift_prometheus_storage_volume_name=prometheus
+openshift_prometheus_storage_alertbuffer_volume_size=10Gi
+openshift_prometheus_storage_labels={'storage': 'prometheus'}
+```
+
+NOTE: Setting `openshift_prometheus_<COMPONENT>_storage_labels` overrides `openshift_prometheus_<COMPONENT>_pvc_pv_selector`
+
+
+## Additional Alert Rules file variable
+An external file with alert rules can be added by setting path to additional rules variable:
+```
+openshift_prometheus_additional_rules_file: <PATH>
+```
+
+File content should be in prometheus alert rules format.
+Following example sets rule to fire an alert when one of the cluster nodes is down:
+
+```
+groups:
+- name: example-rules
+ interval: 30s # defaults to global interval
+ rules:
+ - alert: Node Down
+ expr: up{job="kubernetes-nodes"} == 0
+ annotations:
+ miqTarget: "ContainerNode"
+ severity: "HIGH"
+ message: "{{ '{{' }}{{ '$labels.instance' }}{{ '}}' }} is down"
+```
+
+
+## Additional variables to control resource limits
+Each prometheus component (prometheus, alertmanager, alert-buffer, oauth-proxy) can specify a cpu and memory limits and requests by setting
+the corresponding role variable:
+```
+openshift_prometheus_<COMPONENT>_(limits|requests)_(memory|cpu): <VALUE>
+```
+e.g
+```
+openshift_prometheus_alertmanager_limits_memory: 1Gi
+openshift_prometheus_oath_proxy_requests_cpu: 100
+```
+
+Dependencies
+------------
+
+openshift_facts
+
+
+Example Playbook
+----------------
+
+```
+- name: Configure openshift-prometheus
+ hosts: oo_first_master
+ roles:
+ - role: openshift_prometheus
+```
+
+License
+-------
+
+Apache License, Version 2.0
+
diff --git a/roles/openshift_prometheus/defaults/main.yaml b/roles/openshift_prometheus/defaults/main.yaml
new file mode 100644
index 000000000..00995eee6
--- /dev/null
+++ b/roles/openshift_prometheus/defaults/main.yaml
@@ -0,0 +1,60 @@
+---
+# defaults file for openshift_prometheus
+openshift_prometheus_state: present
+
+openshift_prometheus_namespace: prometheus
+
+openshift_prometheus_node_selector: {"region":"infra"}
+
+# image defaults
+openshift_prometheus_image_prefix: "openshift/"
+openshift_prometheus_image_version: "v2.0.0-dev.3"
+openshift_prometheus_proxy_image_prefix: "openshift/"
+openshift_prometheus_proxy_image_version: "v1.0.0"
+openshift_prometheus_alertmanager_image_prefix: "openshift/"
+openshift_prometheus_alertmanager_image_version: "v0.9.1"
+openshift_prometheus_alertbuffer_image_prefix: "openshift/"
+openshift_prometheus_alertbuffer_image_version: "v0.0.2"
+
+# additional prometheus rules file
+openshift_prometheus_additional_rules_file: null
+
+# storage
+# One of ['emptydir', 'pvc']
+openshift_prometheus_storage_type: "emptydir"
+openshift_prometheus_pvc_name: prometheus
+openshift_prometheus_pvc_size: "{{ openshift_prometheus_storage_volume_size | default('10Gi') }}"
+openshift_prometheus_pvc_access_modes: [ReadWriteOnce]
+openshift_prometheus_pvc_pv_selector: "{{ openshift_prometheus_storage_labels | default({}) }}"
+
+# One of ['emptydir', 'pvc']
+openshift_prometheus_alertmanager_storage_type: "emptydir"
+openshift_prometheus_alertmanager_pvc_name: prometheus-alertmanager
+openshift_prometheus_alertmanager_pvc_size: "{{ openshift_prometheus_alertmanager_storage_volume_size | default('10Gi') }}"
+openshift_prometheus_alertmanager_pvc_access_modes: [ReadWriteOnce]
+openshift_prometheus_alertmanager_pvc_pv_selector: "{{ openshift_prometheus_alertmanager_storage_labels | default({}) }}"
+
+# One of ['emptydir', 'pvc']
+openshift_prometheus_alertbuffer_storage_type: "emptydir"
+openshift_prometheus_alertbuffer_pvc_name: prometheus-alertbuffer
+openshift_prometheus_alertbuffer_pvc_size: "{{ openshift_prometheus_alertbuffer_storage_volume_size | default('10Gi') }}"
+openshift_prometheus_alertbuffer_pvc_access_modes: [ReadWriteOnce]
+openshift_prometheus_alertbuffer_pvc_pv_selector: "{{ openshift_prometheus_alertbuffer_storage_labels | default({}) }}"
+
+# container resources
+openshift_prometheus_cpu_limit: null
+openshift_prometheus_memory_limit: null
+openshift_prometheus_cpu_requests: null
+openshift_prometheus_memory_requests: null
+openshift_prometheus_alertmanager_cpu_limit: null
+openshift_prometheus_alertmanager_memory_limit: null
+openshift_prometheus_alertmanager_cpu_requests: null
+openshift_prometheus_alertmanager_memory_requests: null
+openshift_prometheus_alertbuffer_cpu_limit: null
+openshift_prometheus_alertbuffer_memory_limit: null
+openshift_prometheus_alertbuffer_cpu_requests: null
+openshift_prometheus_alertbuffer_memory_requests: null
+openshift_prometheus_oauth_proxy_cpu_limit: null
+openshift_prometheus_oauth_proxy_memory_limit: null
+openshift_prometheus_oauth_proxy_cpu_requests: null
+openshift_prometheus_oauth_proxy_memory_requests: null
diff --git a/roles/openshift_prometheus/meta/main.yaml b/roles/openshift_prometheus/meta/main.yaml
new file mode 100644
index 000000000..33188bb7e
--- /dev/null
+++ b/roles/openshift_prometheus/meta/main.yaml
@@ -0,0 +1,19 @@
+---
+galaxy_info:
+ author: OpenShift Development <dev@lists.openshift.redhat.com>
+ description: Deploy OpenShift prometheus integration for the cluster
+ company: Red Hat, Inc.
+ license: license (Apache)
+ min_ansible_version: 2.2
+ platforms:
+ - name: EL
+ versions:
+ - 7
+ - name: Fedora
+ versions:
+ - all
+ categories:
+ - openshift
+dependencies:
+- { role: lib_openshift }
+- { role: openshift_facts }
diff --git a/roles/openshift_prometheus/tasks/install_prometheus.yaml b/roles/openshift_prometheus/tasks/install_prometheus.yaml
new file mode 100644
index 000000000..00c3c1987
--- /dev/null
+++ b/roles/openshift_prometheus/tasks/install_prometheus.yaml
@@ -0,0 +1,238 @@
+---
+
+# namespace
+- name: Add prometheus project
+ oc_project:
+ state: "{{ state }}"
+ name: "{{ openshift_prometheus_namespace }}"
+ node_selector: "{{ openshift_prometheus_node_selector | oo_selector_to_string_list() }}"
+ description: Prometheus
+
+# secrets
+- name: Set alert and prometheus secrets
+ oc_secret:
+ state: "{{ state }}"
+ name: "{{ item }}-proxy"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ contents:
+ - path: session_secret
+ data: "{{ 43 | oo_random_word }}="
+ with_items:
+ - prometheus
+ - alerts
+
+# serviceaccount
+- name: create prometheus serviceaccount
+ oc_serviceaccount:
+ state: "{{ state }}"
+ name: prometheus
+ namespace: "{{ openshift_prometheus_namespace }}"
+ # TODO add annotations when supproted
+ # annotations:
+ # serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
+ # serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
+
+ secrets:
+ - prometheus-secrets
+ changed_when: no
+
+# TODO remove this when annotations are supported by oc_serviceaccount
+- name: annotate serviceaccount
+ command: >
+ {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
+ serviceaccount prometheus
+ serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
+ serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
+
+
+# create clusterrolebinding for prometheus serviceaccount
+- name: Set cluster-reader permissions for prometheus
+ oc_adm_policy_user:
+ state: "{{ state }}"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ resource_kind: cluster-role
+ resource_name: cluster-reader
+ user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus"
+
+# create prometheus and alerts services
+# TODO join into 1 task with loop
+- name: Create prometheus service
+ oc_service:
+ state: "{{ state }}"
+ name: "{{ item.name }}"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ selector:
+ app: prometheus
+ labels:
+ name: "{{ item.name }}"
+ # TODO add annotations when supported
+ # annotations:
+ # service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls"
+ ports:
+ - port: 443
+ targetPort: 8443
+ with_items:
+ - name: prometheus
+
+- name: Create alerts service
+ oc_service:
+ state: "{{ state }}"
+ name: "{{ item.name }}"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ selector:
+ app: prometheus
+ labels:
+ name: "{{ item.name }}"
+ # TODO add annotations when supported
+ # annotations:
+ # service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls"
+ ports:
+ - port: 443
+ targetPort: 9443
+ with_items:
+ - name: alerts
+
+
+# Annotate services with secret name
+# TODO remove this when annotations are supported by oc_service
+- name: annotate prometheus service
+ command: >
+ {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
+ service prometheus
+ prometheus.io/scrape='true'
+ prometheus.io/scheme=https
+ service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls
+
+- name: annotate alerts service
+ command: >
+ {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
+ service alerts 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-alerts-tls'
+
+# create prometheus and alerts routes
+- name: create prometheus and alerts routes
+ oc_route:
+ state: "{{ state }}"
+ name: "{{ item.name }}"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ service_name: "{{ item.name }}"
+ tls_termination: reencrypt
+ with_items:
+ - name: prometheus
+ - name: alerts
+
+# Storage
+- name: create prometheus pvc
+ oc_pvc:
+ namespace: "{{ openshift_prometheus_namespace }}"
+ name: "{{ openshift_prometheus_pvc_name }}"
+ access_modes: "{{ openshift_prometheus_pvc_access_modes }}"
+ volume_capacity: "{{ openshift_prometheus_pvc_size }}"
+ selector: "{{ openshift_prometheus_pvc_pv_selector }}"
+ when: openshift_prometheus_storage_type == 'pvc'
+
+- name: create alertmanager pvc
+ oc_pvc:
+ namespace: "{{ openshift_prometheus_namespace }}"
+ name: "{{ openshift_prometheus_alertmanager_pvc_name }}"
+ access_modes: "{{ openshift_prometheus_alertmanager_pvc_access_modes }}"
+ volume_capacity: "{{ openshift_prometheus_alertmanager_pvc_size }}"
+ selector: "{{ openshift_prometheus_alertmanager_pvc_pv_selector }}"
+ when: openshift_prometheus_alertmanager_storage_type == 'pvc'
+
+- name: create alertbuffer pvc
+ oc_pvc:
+ namespace: "{{ openshift_prometheus_namespace }}"
+ name: "{{ openshift_prometheus_alertbuffer_pvc_name }}"
+ access_modes: "{{ openshift_prometheus_alertbuffer_pvc_access_modes }}"
+ volume_capacity: "{{ openshift_prometheus_alertbuffer_pvc_size }}"
+ selector: "{{ openshift_prometheus_alertbuffer_pvc_pv_selector }}"
+ when: openshift_prometheus_alertbuffer_storage_type == 'pvc'
+
+# create prometheus stateful set
+- name: Set prometheus template
+ template:
+ src: prometheus.j2
+ dest: "{{ tempdir }}/templates/prometheus.yaml"
+ vars:
+ namespace: "{{ openshift_prometheus_namespace }}"
+# prom_replicas: "{{ openshift_prometheus_replicas }}"
+
+- name: Set prometheus stateful set
+ oc_obj:
+ state: "{{ state }}"
+ name: "prometheus"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ kind: statefulset
+ files:
+ - "{{ tempdir }}/templates/prometheus.yaml"
+ delete_after: true
+
+# prometheus configmap
+# Copy the additional rules file if it is defined
+- name: Copy additional rules file to host
+ copy:
+ src: "{{ openshift_prometheus_additional_rules_file }}"
+ dest: "{{ tempdir }}/prometheus.additional.rules"
+ when:
+ - openshift_prometheus_additional_rules_file is defined
+ - openshift_prometheus_additional_rules_file is not none
+ - openshift_prometheus_additional_rules_file | trim | length > 0
+
+- stat:
+ path: "{{ tempdir }}/prometheus.additional.rules"
+ register: additional_rules_stat
+
+# The kubernetes version impacts the prometheus scraping endpoint
+# so gathering it before constructing the configmap
+- name: get oc version
+ oc_version:
+ register: oc_version
+
+- set_fact:
+ kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}"
+
+- template:
+ src: prometheus.yml.j2
+ dest: "{{ tempdir }}/prometheus.yml"
+ changed_when: no
+
+- template:
+ src: prometheus.rules.j2
+ dest: "{{ tempdir }}/prometheus.rules"
+ changed_when: no
+
+# In prometheus configmap create "additional.rules" section if file exists
+- name: Set prometheus configmap
+ oc_configmap:
+ state: "{{ state }}"
+ name: "prometheus"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ from_file:
+ prometheus.rules: "{{ tempdir }}/prometheus.rules"
+ prometheus.additional.rules: "{{ tempdir }}/prometheus.additional.rules"
+ prometheus.yml: "{{ tempdir }}/prometheus.yml"
+ when: additional_rules_stat.stat.exists == True
+
+- name: Set prometheus configmap
+ oc_configmap:
+ state: "{{ state }}"
+ name: "prometheus"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ from_file:
+ prometheus.rules: "{{ tempdir }}/prometheus.rules"
+ prometheus.yml: "{{ tempdir }}/prometheus.yml"
+ when: additional_rules_stat.stat.exists == False
+
+# alertmanager configmap
+- template:
+ src: alertmanager.yml.j2
+ dest: "{{ tempdir }}/alertmanager.yml"
+ changed_when: no
+
+- name: Set alertmanager configmap
+ oc_configmap:
+ state: "{{ state }}"
+ name: "prometheus-alerts"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ from_file:
+ alertmanager.yml: "{{ tempdir }}/alertmanager.yml"
diff --git a/roles/openshift_prometheus/tasks/main.yaml b/roles/openshift_prometheus/tasks/main.yaml
new file mode 100644
index 000000000..523a64334
--- /dev/null
+++ b/roles/openshift_prometheus/tasks/main.yaml
@@ -0,0 +1,26 @@
+---
+
+- name: Create temp directory for doing work in on target
+ command: mktemp -td openshift-prometheus-ansible-XXXXXX
+ register: mktemp
+ changed_when: False
+
+- set_fact:
+ tempdir: "{{ mktemp.stdout }}"
+
+- name: Create templates subdirectory
+ file:
+ state: directory
+ path: "{{ tempdir }}/templates"
+ mode: 0755
+ changed_when: False
+
+- include: install_prometheus.yaml
+ vars:
+ state: "{{ openshift_prometheus_state }}"
+
+- name: Delete temp directory
+ file:
+ name: "{{ tempdir }}"
+ state: absent
+ changed_when: False
diff --git a/roles/openshift_prometheus/templates/alertmanager.yml.j2 b/roles/openshift_prometheus/templates/alertmanager.yml.j2
new file mode 100644
index 000000000..6c432a3d0
--- /dev/null
+++ b/roles/openshift_prometheus/templates/alertmanager.yml.j2
@@ -0,0 +1,20 @@
+global:
+
+# The root route on which each incoming alert enters.
+route:
+ # default route if none match
+ receiver: alert-buffer-wh
+
+ # The labels by which incoming alerts are grouped together. For example,
+ # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
+ # be batched into a single group.
+ # TODO:
+ group_by: []
+
+ # All the above attributes are inherited by all child routes and can
+ # overwritten on each.
+
+receivers:
+- name: alert-buffer-wh
+ webhook_configs:
+ - url: http://localhost:9099/topics/alerts
diff --git a/roles/openshift_prometheus/templates/prometheus.j2 b/roles/openshift_prometheus/templates/prometheus.j2
new file mode 100644
index 000000000..916c57aa2
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prometheus.j2
@@ -0,0 +1,247 @@
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+ name: prometheus
+ namespace: {{ namespace }}
+ labels:
+ app: prometheus
+spec:
+ updateStrategy:
+ type: RollingUpdate
+ podManagementPolicy: Parallel
+ selector:
+ provider: openshift
+ matchLabels:
+ app: prometheus
+ template:
+ metadata:
+ name: prometheus
+ labels:
+ app: prometheus
+ spec:
+ serviceAccountName: prometheus
+{% if openshift_prometheus_node_selector is iterable and openshift_prometheus_node_selector | length > 0 %}
+ nodeSelector:
+{% for key, value in openshift_prometheus_node_selector.iteritems() %}
+ {{key}}: "{{value}}"
+{% endfor %}
+{% endif %}
+ containers:
+ # Deploy Prometheus behind an oauth proxy
+ - name: prom-proxy
+ image: "{{openshift_prometheus_proxy_image_prefix}}oauth-proxy:{{openshift_prometheus_proxy_image_version}}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_limit}}"
+{% endif %}
+ ports:
+ - containerPort: 8443
+ name: web
+ args:
+ - -provider=openshift
+ - -https-address=:8443
+ - -http-address=
+ - -email-domain=*
+ - -upstream=http://localhost:9090
+ - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+ - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
+ - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
+ - -tls-cert=/etc/tls/private/tls.crt
+ - -tls-key=/etc/tls/private/tls.key
+ - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+ - -cookie-secret-file=/etc/proxy/secrets/session_secret
+ - -openshift-ca=/etc/pki/tls/cert.pem
+ - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ - -skip-auth-regex=^/metrics
+ volumeMounts:
+ - mountPath: /etc/tls/private
+ name: prometheus-tls
+ - mountPath: /etc/proxy/secrets
+ name: prometheus-secrets
+ - mountPath: /prometheus
+ name: prometheus-data
+
+ - name: prometheus
+ args:
+ - --storage.tsdb.retention=6h
+ - --storage.tsdb.min-block-duration=2m
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --web.listen-address=localhost:9090
+ image: "{{openshift_prometheus_image_prefix}}prometheus:{{openshift_prometheus_image_version}}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_memory_requests is defined and openshift_prometheus_memory_requests is not none %}
+ memory: "{{openshift_prometheus_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_cpu_requests is defined and openshift_prometheus_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_memory_limit is defined and openshift_prometheus_memory_limit is not none %}
+ memory: "{{ openshift_prometheus_memory_limit }}"
+{% endif %}
+{% if openshift_prometheus_cpu_limit is defined and openshift_prometheus_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_cpu_limit}}"
+{% endif %}
+
+ volumeMounts:
+ - mountPath: /etc/prometheus
+ name: prometheus-config
+ - mountPath: /prometheus
+ name: prometheus-data
+
+ # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy
+ - name: alerts-proxy
+ image: "{{openshift_prometheus_proxy_image_prefix}}oauth-proxy:{{openshift_prometheus_proxy_image_version}}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_limit}}"
+{% endif %}
+ ports:
+ - containerPort: 9443
+ name: web
+ args:
+ - -provider=openshift
+ - -https-address=:9443
+ - -http-address=
+ - -email-domain=*
+ - -upstream=http://localhost:9099
+ - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+ - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
+ - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
+ - -tls-cert=/etc/tls/private/tls.crt
+ - -tls-key=/etc/tls/private/tls.key
+ - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+ - -cookie-secret-file=/etc/proxy/secrets/session_secret
+ - -openshift-ca=/etc/pki/tls/cert.pem
+ - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ volumeMounts:
+ - mountPath: /etc/tls/private
+ name: alerts-tls
+ - mountPath: /etc/proxy/secrets
+ name: alerts-secrets
+
+ - name: alert-buffer
+ args:
+ - --storage-path=/alert-buffer/messages.db
+ image: "{{openshift_prometheus_alertbuffer_image_prefix}}prometheus-alert-buffer:{{openshift_prometheus_alertbuffer_image_version}}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_alertbuffer_memory_requests is defined and openshift_prometheus_alertbuffer_memory_requests is not none %}
+ memory: "{{openshift_prometheus_alertbuffer_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_alertbuffer_cpu_requests is defined and openshift_prometheus_alertbuffer_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_alertbuffer_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_alertbuffer_memory_limit is defined and openshift_prometheus_alertbuffer_memory_limit is not none %}
+ memory: "{{openshift_prometheus_alertbuffer_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_alertbuffer_cpu_limit is defined and openshift_prometheus_alertbuffer_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_alertbuffer_cpu_limit}}"
+{% endif %}
+ volumeMounts:
+ - mountPath: /alert-buffer
+ name: alert-buffer-data
+ ports:
+ - containerPort: 9099
+ name: alert-buf
+
+ - name: alertmanager
+ args:
+ - -config.file=/etc/alertmanager/alertmanager.yml
+ image: "{{openshift_prometheus_alertmanager_image_prefix}}prometheus-alertmanager:{{openshift_prometheus_alertmanager_image_version}}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_alertmanager_memory_requests is defined and openshift_prometheus_alertmanager_memory_requests is not none %}
+ memory: "{{openshift_prometheus_alertmanager_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_alertmanager_cpu_requests is defined and openshift_prometheus_alertmanager_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_alertmanager_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_alertmanager_memory_limit is defined and openshift_prometheus_alertmanager_memory_limit is not none %}
+ memory: "{{openshift_prometheus_alertmanager_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_alertmanager_cpu_limit is defined and openshift_prometheus_alertmanager_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_alertmanager_cpu_limit}}"
+{% endif %}
+ ports:
+ - containerPort: 9093
+ name: web
+ volumeMounts:
+ - mountPath: /etc/alertmanager
+ name: alertmanager-config
+ - mountPath: /alertmanager
+ name: alertmanager-data
+
+ restartPolicy: Always
+ volumes:
+ - name: prometheus-config
+ configMap:
+ defaultMode: 420
+ name: prometheus
+ - name: prometheus-secrets
+ secret:
+ secretName: prometheus-proxy
+ - name: prometheus-tls
+ secret:
+ secretName: prometheus-tls
+ - name: prometheus-data
+{% if openshift_prometheus_storage_type == 'pvc' %}
+ persistentVolumeClaim:
+ claimName: {{ openshift_prometheus_pvc_name }}
+{% else %}
+ emptydir: {}
+{% endif %}
+ - name: alertmanager-config
+ configMap:
+ defaultMode: 420
+ name: prometheus-alerts
+ - name: alerts-secrets
+ secret:
+ secretName: alerts-proxy
+ - name: alerts-tls
+ secret:
+ secretName: prometheus-alerts-tls
+ - name: alertmanager-data
+{% if openshift_prometheus_alertmanager_storage_type == 'pvc' %}
+ persistentVolumeClaim:
+ claimName: {{ openshift_prometheus_alertmanager_pvc_name }}
+{% else %}
+ emptydir: {}
+{% endif %}
+ - name: alert-buffer-data
+{% if openshift_prometheus_alertbuffer_storage_type == 'pvc' %}
+ persistentVolumeClaim:
+ claimName: {{ openshift_prometheus_alertbuffer_pvc_name }}
+{% else %}
+ emptydir: {}
+{% endif %}
diff --git a/roles/openshift_prometheus/templates/prometheus.rules.j2 b/roles/openshift_prometheus/templates/prometheus.rules.j2
new file mode 100644
index 000000000..e861dc127
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prometheus.rules.j2
@@ -0,0 +1,4 @@
+groups:
+- name: example-rules
+ interval: 30s # defaults to global interval
+ rules:
diff --git a/roles/openshift_prometheus/templates/prometheus.yml.j2 b/roles/openshift_prometheus/templates/prometheus.yml.j2
new file mode 100644
index 000000000..63430f834
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prometheus.yml.j2
@@ -0,0 +1,174 @@
+rule_files:
+ - 'prometheus.rules'
+{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %}
+ - 'prometheus.additional.rules'
+{% endif %}
+
+
+
+# A scrape configuration for running Prometheus on a Kubernetes cluster.
+# This uses separate scrape configs for cluster components (i.e. API server, node)
+# and services to allow each to use different authentication configs.
+#
+# Kubernetes labels will be added as Prometheus labels on metrics via the
+# `labelmap` relabeling action.
+
+# Scrape config for API servers.
+#
+# Kubernetes exposes API servers as endpoints to the default/kubernetes
+# service so this uses `endpoints` role and uses relabelling to only keep
+# the endpoints associated with the default/kubernetes service using the
+# default named port `https`. This works for single API server deployments as
+# well as HA API server deployments.
+scrape_configs:
+- job_name: 'kubernetes-apiservers'
+
+ kubernetes_sd_configs:
+ - role: endpoints
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+ # Keep only the default/kubernetes service endpoints for the https port. This
+ # will add targets for each API server which Kubernetes adds an endpoint to
+ # the default/kubernetes service.
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
+ action: keep
+ regex: default;kubernetes;https
+
+# Scrape config for nodes.
+#
+# Each node exposes a /metrics endpoint that contains operational metrics for
+# the Kubelet and other components.
+- job_name: 'kubernetes-nodes'
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+ kubernetes_sd_configs:
+ - role: node
+
+ relabel_configs:
+ - action: labelmap
+ regex: __meta_kubernetes_node_label_(.+)
+
+# Scrape config for controllers.
+#
+# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
+# the controllers.
+#
+# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via
+# endpoints.
+- job_name: 'kubernetes-controllers'
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+ kubernetes_sd_configs:
+ - role: endpoints
+
+ # Keep only the default/kubernetes service endpoints for the https port, and then
+ # set the port to 8444. This is the default configuration for the controllers on OpenShift
+ # masters.
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
+ action: keep
+ regex: default;kubernetes;https
+ - source_labels: [__address__]
+ action: replace
+ target_label: __address__
+ regex: (.+)(?::\d+)
+ replacement: $1:8444
+
+# Scrape config for cAdvisor.
+#
+# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
+# reports container metrics for each running pod. Scrape those by default.
+- job_name: 'kubernetes-cadvisor'
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+{% if kubernetes_version | float() >= 1.7 | float() %}
+ metrics_path: /metrics/cadvisor
+{% else %}
+ metrics_path: /metrics
+{% endif %}
+
+ kubernetes_sd_configs:
+ - role: node
+
+ relabel_configs:
+ - action: labelmap
+ regex: __meta_kubernetes_node_label_(.+)
+
+# Scrape config for service endpoints.
+#
+# The relabeling allows the actual service scrape endpoint to be configured
+# via the following annotations:
+#
+# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
+# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
+# to set this to `https` & most likely set the `tls_config` of the scrape config.
+# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
+# * `prometheus.io/port`: If the metrics are exposed on a different port to the
+# service then set this appropriately.
+- job_name: 'kubernetes-service-endpoints'
+
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ # TODO: this should be per target
+ insecure_skip_verify: true
+
+ kubernetes_sd_configs:
+ - role: endpoints
+
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
+ action: keep
+ regex: true
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
+ action: replace
+ target_label: __scheme__
+ regex: (https?)
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
+ action: replace
+ target_label: __metrics_path__
+ regex: (.+)
+ - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
+ action: replace
+ target_label: __address__
+ regex: (.+)(?::\d+);(\d+)
+ replacement: $1:$2
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username]
+ action: replace
+ target_label: __basic_auth_username__
+ regex: (.+)
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password]
+ action: replace
+ target_label: __basic_auth_password__
+ regex: (.+)
+ - action: labelmap
+ regex: __meta_kubernetes_service_label_(.+)
+ - source_labels: [__meta_kubernetes_namespace]
+ action: replace
+ target_label: kubernetes_namespace
+ - source_labels: [__meta_kubernetes_service_name]
+ action: replace
+ target_label: kubernetes_name
+
+alerting:
+ alertmanagers:
+ - scheme: http
+ static_configs:
+ - targets:
+ - "localhost:9093"
diff --git a/roles/openshift_prometheus/tests/inventory b/roles/openshift_prometheus/tests/inventory
new file mode 100644
index 000000000..878877b07
--- /dev/null
+++ b/roles/openshift_prometheus/tests/inventory
@@ -0,0 +1,2 @@
+localhost
+
diff --git a/roles/openshift_prometheus/tests/test.yaml b/roles/openshift_prometheus/tests/test.yaml
new file mode 100644
index 000000000..37baf573c
--- /dev/null
+++ b/roles/openshift_prometheus/tests/test.yaml
@@ -0,0 +1,5 @@
+---
+- hosts: localhost
+ remote_user: root
+ roles:
+ - openshift_prometheus