summaryrefslogtreecommitdiffstats
path: root/roles/openshift_prometheus
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_prometheus')
-rw-r--r--roles/openshift_prometheus/README.md35
-rw-r--r--roles/openshift_prometheus/defaults/main.yaml56
-rw-r--r--roles/openshift_prometheus/files/openshift_prometheus.exports3
-rw-r--r--roles/openshift_prometheus/tasks/create_pvs.yaml36
-rw-r--r--roles/openshift_prometheus/tasks/install_prometheus.yaml24
-rw-r--r--roles/openshift_prometheus/tasks/nfs.yaml44
-rw-r--r--roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j215
-rw-r--r--roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j215
-rw-r--r--roles/openshift_prometheus/templates/prom-pv-server.yml.j215
-rw-r--r--roles/openshift_prometheus/templates/prometheus.j2 (renamed from roles/openshift_prometheus/templates/prometheus_deployment.j2)25
10 files changed, 75 insertions, 193 deletions
diff --git a/roles/openshift_prometheus/README.md b/roles/openshift_prometheus/README.md
index c5a44bffb..92f74928c 100644
--- a/roles/openshift_prometheus/README.md
+++ b/roles/openshift_prometheus/README.md
@@ -17,16 +17,16 @@ For default values, see [`defaults/main.yaml`](defaults/main.yaml).
- `openshift_prometheus_namespace`: project (i.e. namespace) where the components will be
deployed.
-- `openshift_prometheus_replicas`: The number of replicas for prometheus deployment.
-
- `openshift_prometheus_node_selector`: Selector for the nodes prometheus will be deployed on.
-- `openshift_prometheus_image_<COMPONENT>`: specify image for the component
+- `openshift_prometheus_<COMPONENT>_image_prefix`: specify image prefix for the component
+
+- `openshift_prometheus_<COMPONENT>_image_version`: specify image version for the component
-## Storage related variables
-Each prometheus component (prometheus, alertmanager, alert-buffer, oauth-proxy) can set pv claim by setting corresponding role variable:
+## PVC related variables
+Each prometheus component (prometheus, alertmanager, alertbuffer) can set pv claim by setting corresponding role variable:
```
-openshift_prometheus_<COMPONENT>_storage_type: <VALUE>
+openshift_prometheus_<COMPONENT>_storage_type: <VALUE> (pvc, emptydir)
openshift_prometheus_<COMPONENT>_pvc_(name|size|access_modes|pv_selector): <VALUE>
```
e.g
@@ -37,6 +37,29 @@ openshift_prometheus_alertbuffer_pvc_size: 10G
openshift_prometheus_pvc_access_modes: [ReadWriteOnce]
```
+## NFS PV Storage variables
+Each prometheus component (prometheus, alertmanager, alertbuffer) can set nfs pv by setting corresponding variable:
+```
+openshift_prometheus_<COMPONENT>_storage_kind=<VALUE>
+openshift_prometheus_<COMPONENT>_storage_(access_modes|host|labels)=<VALUE>
+openshift_prometheus_<COMPONENT>_storage_volume_(name|size)=<VALUE>
+openshift_prometheus_<COMPONENT>_storage_nfs_(directory|options)=<VALUE>
+```
+e.g
+```
+openshift_prometheus_storage_kind=nfs
+openshift_prometheus_storage_access_modes=['ReadWriteOnce']
+openshift_prometheus_storage_host=nfs.example.com #for external host
+openshift_prometheus_storage_nfs_directory=/exports
+openshift_prometheus_storage_alertmanager_nfs_options='*(rw,root_squash)'
+openshift_prometheus_storage_volume_name=prometheus
+openshift_prometheus_storage_alertbuffer_volume_size=10Gi
+openshift_prometheus_storage_labels={'storage': 'prometheus'}
+```
+
+NOTE: Setting `openshift_prometheus_<COMPONENT>_storage_labels` overrides `openshift_prometheus_<COMPONENT>_pvc_pv_selector`
+
+
## Additional Alert Rules file variable
An external file with alert rules can be added by setting path to additional rules variable:
```
diff --git a/roles/openshift_prometheus/defaults/main.yaml b/roles/openshift_prometheus/defaults/main.yaml
index 5aa8aecec..00995eee6 100644
--- a/roles/openshift_prometheus/defaults/main.yaml
+++ b/roles/openshift_prometheus/defaults/main.yaml
@@ -4,56 +4,42 @@ openshift_prometheus_state: present
openshift_prometheus_namespace: prometheus
-openshift_prometheus_replicas: 1
openshift_prometheus_node_selector: {"region":"infra"}
-# images
-openshift_prometheus_image_proxy: "openshift/oauth-proxy:v1.0.0"
-openshift_prometheus_image_prometheus: "openshift/prometheus:v2.0.0-dev"
-openshift_prometheus_image_alertmanager: "openshift/prometheus-alertmanager:dev"
-openshift_prometheus_image_alertbuffer: "openshift/prometheus-alert-buffer:v0.0.1"
+# image defaults
+openshift_prometheus_image_prefix: "openshift/"
+openshift_prometheus_image_version: "v2.0.0-dev.3"
+openshift_prometheus_proxy_image_prefix: "openshift/"
+openshift_prometheus_proxy_image_version: "v1.0.0"
+openshift_prometheus_alertmanager_image_prefix: "openshift/"
+openshift_prometheus_alertmanager_image_version: "v0.9.1"
+openshift_prometheus_alertbuffer_image_prefix: "openshift/"
+openshift_prometheus_alertbuffer_image_version: "v0.0.2"
# additional prometheus rules file
openshift_prometheus_additional_rules_file: null
-# All the required exports
-openshift_prometheus_pv_exports:
- - prometheus
- - prometheus-alertmanager
- - prometheus-alertbuffer
-# PV template files and their created object names
-openshift_prometheus_pv_data:
- - pv_name: prometheus
- pv_template: prom-pv-server.yml
- pv_label: Prometheus Server PV
- - pv_name: prometheus-alertmanager
- pv_template: prom-pv-alertmanager.yml
- pv_label: Prometheus Alertmanager PV
- - pv_name: prometheus-alertbuffer
- pv_template: prom-pv-alertbuffer.yml
- pv_label: Prometheus Alert Buffer PV
-
-# Hostname/IP of the NFS server. Currently defaults to first master
-openshift_prometheus_nfs_server: "{{ groups.nfs.0 }}"
-
# storage
-openshift_prometheus_storage_type: pvc
+# One of ['emptydir', 'pvc']
+openshift_prometheus_storage_type: "emptydir"
openshift_prometheus_pvc_name: prometheus
-openshift_prometheus_pvc_size: 10G
+openshift_prometheus_pvc_size: "{{ openshift_prometheus_storage_volume_size | default('10Gi') }}"
openshift_prometheus_pvc_access_modes: [ReadWriteOnce]
-openshift_prometheus_pvc_pv_selector: {}
+openshift_prometheus_pvc_pv_selector: "{{ openshift_prometheus_storage_labels | default({}) }}"
-openshift_prometheus_alertmanager_storage_type: pvc
+# One of ['emptydir', 'pvc']
+openshift_prometheus_alertmanager_storage_type: "emptydir"
openshift_prometheus_alertmanager_pvc_name: prometheus-alertmanager
-openshift_prometheus_alertmanager_pvc_size: 10G
+openshift_prometheus_alertmanager_pvc_size: "{{ openshift_prometheus_alertmanager_storage_volume_size | default('10Gi') }}"
openshift_prometheus_alertmanager_pvc_access_modes: [ReadWriteOnce]
-openshift_prometheus_alertmanager_pvc_pv_selector: {}
+openshift_prometheus_alertmanager_pvc_pv_selector: "{{ openshift_prometheus_alertmanager_storage_labels | default({}) }}"
-openshift_prometheus_alertbuffer_storage_type: pvc
+# One of ['emptydir', 'pvc']
+openshift_prometheus_alertbuffer_storage_type: "emptydir"
openshift_prometheus_alertbuffer_pvc_name: prometheus-alertbuffer
-openshift_prometheus_alertbuffer_pvc_size: 10G
+openshift_prometheus_alertbuffer_pvc_size: "{{ openshift_prometheus_alertbuffer_storage_volume_size | default('10Gi') }}"
openshift_prometheus_alertbuffer_pvc_access_modes: [ReadWriteOnce]
-openshift_prometheus_alertbuffer_pvc_pv_selector: {}
+openshift_prometheus_alertbuffer_pvc_pv_selector: "{{ openshift_prometheus_alertbuffer_storage_labels | default({}) }}"
# container resources
openshift_prometheus_cpu_limit: null
diff --git a/roles/openshift_prometheus/files/openshift_prometheus.exports b/roles/openshift_prometheus/files/openshift_prometheus.exports
deleted file mode 100644
index 3ccedb1fd..000000000
--- a/roles/openshift_prometheus/files/openshift_prometheus.exports
+++ /dev/null
@@ -1,3 +0,0 @@
-/exports/prometheus *(rw,no_root_squash,no_wdelay)
-/exports/prometheus-alertmanager *(rw,no_root_squash,no_wdelay)
-/exports/prometheus-alertbuffer *(rw,no_root_squash,no_wdelay)
diff --git a/roles/openshift_prometheus/tasks/create_pvs.yaml b/roles/openshift_prometheus/tasks/create_pvs.yaml
deleted file mode 100644
index 4e79da05f..000000000
--- a/roles/openshift_prometheus/tasks/create_pvs.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
----
-# Check for existance and then conditionally:
-# - evaluate templates
-# - PVs
-#
-# These tasks idempotently create required Prometheus PV objects. Do not
-# call this file directly. This file is intended to be ran as an
-# include that has a 'with_items' attached to it. Hence the use below
-# of variables like "{{ item.pv_label }}"
-
-- name: "Check if the {{ item.pv_label }} template has been created already"
- oc_obj:
- namespace: "{{ openshift_prometheus_namespace }}"
- state: list
- kind: pv
- name: "{{ item.pv_name }}"
- register: prom_pv_check
-
-# Skip all of this if the PV already exists
-- block:
- - name: "Ensure the {{ item.pv_label }} template is evaluated"
- template:
- src: "{{ item.pv_template }}.j2"
- dest: "{{ tempdir }}/templates/{{ item.pv_template }}"
-
- - name: "Ensure {{ item.pv_label }} is created"
- oc_obj:
- namespace: "{{ openshift_prometheus_namespace }}"
- kind: pv
- name: "{{ item.pv_name }}"
- state: present
- delete_after: True
- files:
- - "{{ tempdir }}/templates/{{ item.pv_template }}"
- when:
- - not prom_pv_check.results.results.0
diff --git a/roles/openshift_prometheus/tasks/install_prometheus.yaml b/roles/openshift_prometheus/tasks/install_prometheus.yaml
index a9bce2fb1..00c3c1987 100644
--- a/roles/openshift_prometheus/tasks/install_prometheus.yaml
+++ b/roles/openshift_prometheus/tasks/install_prometheus.yaml
@@ -54,15 +54,6 @@
resource_name: cluster-reader
user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus"
-
-######################################################################
-# NFS
-# In the case that we are not running on a cloud provider, volumes must be statically provisioned
-
-- include: nfs.yaml
- when: not (openshift_cloudprovider_kind is defined and (openshift_cloudprovider_kind == 'aws' or openshift_cloudprovider_kind == 'gce'))
-
-
# create prometheus and alerts services
# TODO join into 1 task with loop
- name: Create prometheus service
@@ -137,6 +128,7 @@
access_modes: "{{ openshift_prometheus_pvc_access_modes }}"
volume_capacity: "{{ openshift_prometheus_pvc_size }}"
selector: "{{ openshift_prometheus_pvc_pv_selector }}"
+ when: openshift_prometheus_storage_type == 'pvc'
- name: create alertmanager pvc
oc_pvc:
@@ -145,6 +137,7 @@
access_modes: "{{ openshift_prometheus_alertmanager_pvc_access_modes }}"
volume_capacity: "{{ openshift_prometheus_alertmanager_pvc_size }}"
selector: "{{ openshift_prometheus_alertmanager_pvc_pv_selector }}"
+ when: openshift_prometheus_alertmanager_storage_type == 'pvc'
- name: create alertbuffer pvc
oc_pvc:
@@ -153,22 +146,23 @@
access_modes: "{{ openshift_prometheus_alertbuffer_pvc_access_modes }}"
volume_capacity: "{{ openshift_prometheus_alertbuffer_pvc_size }}"
selector: "{{ openshift_prometheus_alertbuffer_pvc_pv_selector }}"
+ when: openshift_prometheus_alertbuffer_storage_type == 'pvc'
-# create prometheus deployment
-- name: Set prometheus deployment template
+# create prometheus stateful set
+- name: Set prometheus template
template:
- src: prometheus_deployment.j2
+ src: prometheus.j2
dest: "{{ tempdir }}/templates/prometheus.yaml"
vars:
namespace: "{{ openshift_prometheus_namespace }}"
- prom_replicas: "{{ openshift_prometheus_replicas }}"
+# prom_replicas: "{{ openshift_prometheus_replicas }}"
-- name: Set prometheus deployment
+- name: Set prometheus stateful set
oc_obj:
state: "{{ state }}"
name: "prometheus"
namespace: "{{ openshift_prometheus_namespace }}"
- kind: deployment
+ kind: statefulset
files:
- "{{ tempdir }}/templates/prometheus.yaml"
delete_after: true
diff --git a/roles/openshift_prometheus/tasks/nfs.yaml b/roles/openshift_prometheus/tasks/nfs.yaml
deleted file mode 100644
index 0b45f2cee..000000000
--- a/roles/openshift_prometheus/tasks/nfs.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
----
-# Tasks to statically provision NFS volumes
-# Include if not using dynamic volume provisioning
-- name: Ensure the /exports/ directory exists
- file:
- path: /exports/
- state: directory
- mode: 0755
- owner: root
- group: root
-
-- name: Ensure the prom-pv0X export directories exist
- file:
- path: "/exports/{{ item }}"
- state: directory
- mode: 0777
- owner: nfsnobody
- group: nfsnobody
- with_items: "{{ openshift_prometheus_pv_exports }}"
-
-- name: Ensure the NFS exports for Prometheus PVs exist
- copy:
- src: openshift_prometheus.exports
- dest: /etc/exports.d/openshift_prometheus.exports
- register: nfs_exports_updated
-
-- name: Ensure the NFS export table is refreshed if exports were added
- command: exportfs -ar
- when:
- - nfs_exports_updated.changed
-
-
-######################################################################
-# Create the required Prometheus PVs. Check out these online docs if you
-# need a refresher on includes looping with items:
-# * http://docs.ansible.com/ansible/playbooks_loops.html#loops-and-includes-in-2-0
-# * http://stackoverflow.com/a/35128533
-#
-# TODO: Handle the case where a PV template is updated in
-# openshift-ansible and the change needs to be landed on the managed
-# cluster.
-
-- include: create_pvs.yaml
- with_items: "{{ openshift_prometheus_pv_data }}"
diff --git a/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2
deleted file mode 100644
index 55a5e19c3..000000000
--- a/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: PersistentVolume
-metadata:
- name: prometheus-alertbuffer
- labels:
- storage: prometheus-alertbuffer
-spec:
- capacity:
- storage: 15Gi
- accessModes:
- - ReadWriteOnce
- nfs:
- path: /exports/prometheus-alertbuffer
- server: {{ openshift_prometheus_nfs_server }}
- persistentVolumeReclaimPolicy: Retain
diff --git a/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2
deleted file mode 100644
index 4ee518735..000000000
--- a/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: PersistentVolume
-metadata:
- name: prometheus-alertmanager
- labels:
- storage: prometheus-alertmanager
-spec:
- capacity:
- storage: 15Gi
- accessModes:
- - ReadWriteOnce
- nfs:
- path: /exports/prometheus-alertmanager
- server: {{ openshift_prometheus_nfs_server }}
- persistentVolumeReclaimPolicy: Retain
diff --git a/roles/openshift_prometheus/templates/prom-pv-server.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-server.yml.j2
deleted file mode 100644
index 933bf0f60..000000000
--- a/roles/openshift_prometheus/templates/prom-pv-server.yml.j2
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: PersistentVolume
-metadata:
- name: prometheus
- labels:
- storage: prometheus
-spec:
- capacity:
- storage: 15Gi
- accessModes:
- - ReadWriteOnce
- nfs:
- path: /exports/prometheus
- server: {{ openshift_prometheus_nfs_server }}
- persistentVolumeReclaimPolicy: Retain
diff --git a/roles/openshift_prometheus/templates/prometheus_deployment.j2 b/roles/openshift_prometheus/templates/prometheus.j2
index 98c117f19..916c57aa2 100644
--- a/roles/openshift_prometheus/templates/prometheus_deployment.j2
+++ b/roles/openshift_prometheus/templates/prometheus.j2
@@ -1,12 +1,14 @@
-apiVersion: extensions/v1beta1
-kind: Deployment
+apiVersion: apps/v1beta1
+kind: StatefulSet
metadata:
name: prometheus
namespace: {{ namespace }}
labels:
app: prometheus
spec:
- replicas: {{ prom_replicas|default(1) }}
+ updateStrategy:
+ type: RollingUpdate
+ podManagementPolicy: Parallel
selector:
provider: openshift
matchLabels:
@@ -27,7 +29,7 @@ spec:
containers:
# Deploy Prometheus behind an oauth proxy
- name: prom-proxy
- image: "{{ openshift_prometheus_image_proxy }}"
+ image: "{{openshift_prometheus_proxy_image_prefix}}oauth-proxy:{{openshift_prometheus_proxy_image_version}}"
imagePullPolicy: IfNotPresent
resources:
requests:
@@ -38,7 +40,7 @@ spec:
cpu: "{{openshift_prometheus_oauth_proxy_cpu_requests}}"
{% endif %}
limits:
-{% if openshift_prometheus_memory_requests_limit_proxy is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
+{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
memory: "{{openshift_prometheus_oauth_proxy_memory_limit}}"
{% endif %}
{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %}
@@ -60,6 +62,8 @@ spec:
- -tls-key=/etc/tls/private/tls.key
- -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
- -cookie-secret-file=/etc/proxy/secrets/session_secret
+ - -openshift-ca=/etc/pki/tls/cert.pem
+ - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- -skip-auth-regex=^/metrics
volumeMounts:
- mountPath: /etc/tls/private
@@ -72,9 +76,10 @@ spec:
- name: prometheus
args:
- --storage.tsdb.retention=6h
+ - --storage.tsdb.min-block-duration=2m
- --config.file=/etc/prometheus/prometheus.yml
- --web.listen-address=localhost:9090
- image: "{{ openshift_prometheus_image_prometheus }}"
+ image: "{{openshift_prometheus_image_prefix}}prometheus:{{openshift_prometheus_image_version}}"
imagePullPolicy: IfNotPresent
resources:
requests:
@@ -100,7 +105,7 @@ spec:
# Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy
- name: alerts-proxy
- image: "{{ openshift_prometheus_image_proxy }}"
+ image: "{{openshift_prometheus_proxy_image_prefix}}oauth-proxy:{{openshift_prometheus_proxy_image_version}}"
imagePullPolicy: IfNotPresent
resources:
requests:
@@ -133,6 +138,8 @@ spec:
- -tls-key=/etc/tls/private/tls.key
- -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
- -cookie-secret-file=/etc/proxy/secrets/session_secret
+ - -openshift-ca=/etc/pki/tls/cert.pem
+ - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
volumeMounts:
- mountPath: /etc/tls/private
name: alerts-tls
@@ -142,7 +149,7 @@ spec:
- name: alert-buffer
args:
- --storage-path=/alert-buffer/messages.db
- image: "{{ openshift_prometheus_image_alertbuffer }}"
+ image: "{{openshift_prometheus_alertbuffer_image_prefix}}prometheus-alert-buffer:{{openshift_prometheus_alertbuffer_image_version}}"
imagePullPolicy: IfNotPresent
resources:
requests:
@@ -169,7 +176,7 @@ spec:
- name: alertmanager
args:
- -config.file=/etc/alertmanager/alertmanager.yml
- image: "{{ openshift_prometheus_image_alertmanager }}"
+ image: "{{openshift_prometheus_alertmanager_image_prefix}}prometheus-alertmanager:{{openshift_prometheus_alertmanager_image_version}}"
imagePullPolicy: IfNotPresent
resources:
requests: