summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/ansible_service_broker/tasks/install.yml8
-rw-r--r--roles/calico_master/tasks/main.yml2
-rw-r--r--roles/openshift_ca/tasks/main.yml3
-rw-r--r--roles/openshift_expand_partition/tasks/main.yml2
-rw-r--r--roles/openshift_grafana/defaults/main.yml12
-rw-r--r--roles/openshift_grafana/files/grafana-ocp-oauth.yml661
-rw-r--r--roles/openshift_grafana/files/grafana-ocp.yml76
-rw-r--r--roles/openshift_grafana/files/openshift-cluster-monitoring.json5138
-rw-r--r--roles/openshift_grafana/meta/main.yml13
-rw-r--r--roles/openshift_grafana/tasks/gf-permissions.yml12
-rw-r--r--roles/openshift_grafana/tasks/main.yml122
-rw-r--r--roles/openshift_health_checker/openshift_checks/__init__.py43
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/kibana.py13
-rw-r--r--roles/openshift_health_checker/openshift_checks/ovs_version.py27
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_version.py58
-rw-r--r--roles/openshift_health_checker/test/kibana_test.py12
-rw-r--r--roles/openshift_health_checker/test/ovs_version_test.py23
-rw-r--r--roles/openshift_health_checker/test/package_version_test.py5
-rw-r--r--roles/openshift_logging/tasks/install_logging.yaml8
-rw-r--r--roles/openshift_logging_elasticsearch/tasks/get_es_version.yml4
-rw-r--r--roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml4
-rw-r--r--roles/openshift_metrics/tasks/oc_apply.yaml8
-rw-r--r--roles/openshift_node/defaults/main.yml6
-rw-r--r--roles/openshift_persistent_volumes/tasks/pv.yml2
-rw-r--r--roles/openshift_persistent_volumes/tasks/pvc.yml2
-rw-r--r--roles/openshift_provisioners/tasks/oc_apply.yaml12
-rw-r--r--roles/openshift_web_console/vars/default_images.yml4
-rw-r--r--roles/openshift_web_console/vars/openshift-enterprise.yml4
-rw-r--r--roles/template_service_broker/vars/default_images.yml4
-rw-r--r--roles/template_service_broker/vars/openshift-enterprise.yml4
32 files changed, 6147 insertions, 149 deletions
diff --git a/roles/ansible_service_broker/tasks/install.yml b/roles/ansible_service_broker/tasks/install.yml
index 1bc1b5e43..f869b5fae 100644
--- a/roles/ansible_service_broker/tasks/install.yml
+++ b/roles/ansible_service_broker/tasks/install.yml
@@ -375,6 +375,11 @@
secret:
secretName: etcd-auth-secret
+- name: set auth name and type facts if needed
+ set_fact:
+ ansible_service_broker_registry_auth_type: "secret"
+ ansible_service_broker_registry_auth_name: "asb-registry-auth"
+ when: ansible_service_broker_registry_user != "" and ansible_service_broker_registry_password != ""
# TODO: saw a oc_configmap in the library, but didn't understand how to get it to do the following:
- name: Create config map for ansible-service-broker
@@ -402,6 +407,8 @@
org: {{ ansible_service_broker_registry_organization }}
tag: {{ ansible_service_broker_registry_tag }}
white_list: {{ ansible_service_broker_registry_whitelist | to_yaml }}
+ auth_type: "{{ ansible_service_broker_registry_auth_type | default("") }}"
+ auth_name: "{{ ansible_service_broker_registry_auth_name | default("") }}"
- type: local_openshift
name: localregistry
namespaces: ['openshift']
@@ -447,6 +454,7 @@
data: "{{ ansible_service_broker_registry_user }}"
- path: password
data: "{{ ansible_service_broker_registry_password }}"
+ when: ansible_service_broker_registry_user != "" and ansible_service_broker_registry_password != ""
- name: Create the Broker resource in the catalog
oc_obj:
diff --git a/roles/calico_master/tasks/main.yml b/roles/calico_master/tasks/main.yml
index 05415a4d6..834ebba64 100644
--- a/roles/calico_master/tasks/main.yml
+++ b/roles/calico_master/tasks/main.yml
@@ -23,7 +23,7 @@
-f {{ mktemp.stdout }}/calico-policy-controller.yml
--config={{ openshift.common.config_base }}/master/admin.kubeconfig
register: calico_create_output
- failed_when: ('already exists' not in calico_create_output.stderr) and ('created' not in calico_create_output.stdout)
+ failed_when: "('already exists' not in calico_create_output.stderr) and ('created' not in calico_create_output.stdout) and calico_create_output.rc != 0"
changed_when: ('created' in calico_create_output.stdout)
- name: Calico Master | Delete temp directory
diff --git a/roles/openshift_ca/tasks/main.yml b/roles/openshift_ca/tasks/main.yml
index b94cd9fba..9c8534c74 100644
--- a/roles/openshift_ca/tasks/main.yml
+++ b/roles/openshift_ca/tasks/main.yml
@@ -19,7 +19,8 @@
- name: Reload generated facts
openshift_facts:
- when: hostvars[openshift_ca_host].install_result is changed
+ when:
+ - hostvars[openshift_ca_host].install_result | default({'changed':false}) is changed
- name: Create openshift_ca_config_dir if it does not exist
file:
diff --git a/roles/openshift_expand_partition/tasks/main.yml b/roles/openshift_expand_partition/tasks/main.yml
index 5ae863871..b38ebdfb4 100644
--- a/roles/openshift_expand_partition/tasks/main.yml
+++ b/roles/openshift_expand_partition/tasks/main.yml
@@ -8,7 +8,7 @@
- name: Determine if growpart is installed
command: "rpm -q cloud-utils-growpart"
register: has_growpart
- failed_when: has_growpart.cr != 0 and 'package cloud-utils-growpart is not installed' not in has_growpart.stdout
+ failed_when: has_growpart.rc != 0 and 'package cloud-utils-growpart is not installed' not in has_growpart.stdout
changed_when: false
when: openshift_is_containerized | bool
diff --git a/roles/openshift_grafana/defaults/main.yml b/roles/openshift_grafana/defaults/main.yml
new file mode 100644
index 000000000..7fd7a085d
--- /dev/null
+++ b/roles/openshift_grafana/defaults/main.yml
@@ -0,0 +1,12 @@
+---
+gf_body_tmp:
+ name: grafana_name
+ type: prometheus
+ typeLogoUrl: ''
+ access: proxy
+ url: prometheus_url
+ basicAuth: false
+ withCredentials: false
+ jsonData:
+ tlsSkipVerify: true
+ token: satoken
diff --git a/roles/openshift_grafana/files/grafana-ocp-oauth.yml b/roles/openshift_grafana/files/grafana-ocp-oauth.yml
new file mode 100644
index 000000000..82fa89004
--- /dev/null
+++ b/roles/openshift_grafana/files/grafana-ocp-oauth.yml
@@ -0,0 +1,661 @@
+---
+kind: Template
+apiVersion: v1
+metadata:
+ name: grafana-ocp
+ annotations:
+ "openshift.io/display-name": Grafana ocp
+ description: |
+ Grafana server with patched Prometheus datasource.
+ iconClass: icon-cogs
+ tags: "metrics,monitoring,grafana,prometheus"
+parameters:
+- description: The location of the proxy image
+ name: IMAGE_GF
+ value: mrsiano/grafana-ocp:latest
+- description: The location of the proxy image
+ name: IMAGE_PROXY
+ value: openshift/oauth-proxy:v1.0.0
+- description: External URL for the grafana route
+ name: ROUTE_URL
+ value: ""
+- description: The namespace to instantiate heapster under. Defaults to 'grafana'.
+ name: NAMESPACE
+ value: grafana
+- description: The session secret for the proxy
+ name: SESSION_SECRET
+ generate: expression
+ from: "[a-zA-Z0-9]{43}"
+objects:
+- apiVersion: v1
+ kind: ServiceAccount
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ annotations:
+ serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana-ocp"}}'
+- apiVersion: authorization.openshift.io/v1
+ kind: ClusterRoleBinding
+ metadata:
+ name: gf-cluster-reader
+ roleRef:
+ name: cluster-reader
+ subjects:
+ - kind: ServiceAccount
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+- apiVersion: route.openshift.io/v1
+ kind: Route
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ spec:
+ host: "${ROUTE_URL}"
+ to:
+ name: grafana-ocp
+ tls:
+ termination: Reencrypt
+- apiVersion: v1
+ kind: Service
+ metadata:
+ name: grafana-ocp
+ annotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/scheme: https
+ service.alpha.openshift.io/serving-cert-secret-name: gf-tls
+ namespace: "${NAMESPACE}"
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ ports:
+ - name: grafana-ocp
+ port: 443
+ protocol: TCP
+ targetPort: 8443
+ selector:
+ app: grafana-ocp
+- apiVersion: v1
+ kind: Secret
+ metadata:
+ name: gf-proxy
+ namespace: "${NAMESPACE}"
+ stringData:
+ session_secret: "${SESSION_SECRET}="
+# Deploy Prometheus behind an oauth proxy
+- apiVersion: extensions/v1beta1
+ kind: Deployment
+ metadata:
+ labels:
+ app: grafana-ocp
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: grafana-ocp
+ template:
+ metadata:
+ labels:
+ app: grafana-ocp
+ name: grafana-ocp-app
+ spec:
+ serviceAccountName: grafana-ocp
+ containers:
+ - name: oauth-proxy
+ image: ${IMAGE_PROXY}
+ imagePullPolicy: IfNotPresent
+ ports:
+ - containerPort: 8443
+ name: web
+ args:
+ - -https-address=:8443
+ - -http-address=
+ - -email-domain=*
+ - -client-id=system:serviceaccount:${NAMESPACE}:grafana-ocp
+ - -upstream=http://localhost:3000
+ - -provider=openshift
+# - '-openshift-delegate-urls={"/api/datasources": {"resource": "namespace", "verb": "get", "resourceName": "grafana-ocp", "namespace": "${NAMESPACE}"}}'
+ - '-openshift-sar={"namespace": "${NAMESPACE}", "verb": "list", "resource": "services"}'
+ - -tls-cert=/etc/tls/private/tls.crt
+ - -tls-key=/etc/tls/private/tls.key
+ - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+ - -cookie-secret-file=/etc/proxy/secrets/session_secret
+ - -skip-auth-regex=^/metrics,/api/datasources,/api/dashboards
+ volumeMounts:
+ - mountPath: /etc/tls/private
+ name: gf-tls
+ - mountPath: /etc/proxy/secrets
+ name: secrets
+
+ - name: grafana-ocp
+ image: ${IMAGE_GF}
+ ports:
+ - name: grafana-http
+ containerPort: 3000
+ volumeMounts:
+ - mountPath: "/root/go/src/github.com/grafana/grafana/data"
+ name: gf-data
+ - mountPath: "/root/go/src/github.com/grafana/grafana/conf"
+ name: gfconfig
+ - mountPath: /etc/tls/private
+ name: gf-tls
+ - mountPath: /etc/proxy/secrets
+ name: secrets
+ command:
+ - "./bin/grafana-server"
+
+ volumes:
+ - name: gfconfig
+ configMap:
+ name: gf-config
+ - name: secrets
+ secret:
+ secretName: gf-proxy
+ - name: gf-tls
+ secret:
+ secretName: gf-tls
+ - emptyDir: {}
+ name: gf-data
+- apiVersion: v1
+ kind: ConfigMap
+ metadata:
+ name: gf-config
+ namespace: "${NAMESPACE}"
+ data:
+ defaults.ini: |-
+ ##################### Grafana Configuration Defaults #####################
+ #
+ # Do not modify this file in grafana installs
+ #
+
+ # possible values : production, development
+ app_mode = production
+
+ # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty
+ instance_name = ${HOSTNAME}
+
+ #################################### Paths ###############################
+ [paths]
+ # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used)
+ #
+ data = data
+ #
+ # Directory where grafana can store logs
+ #
+ logs = data/log
+ #
+ # Directory where grafana will automatically scan and look for plugins
+ #
+ plugins = data/plugins
+
+ #################################### Server ##############################
+ [server]
+ # Protocol (http, https, socket)
+ protocol = http
+
+ # The ip address to bind to, empty will bind to all interfaces
+ http_addr =
+
+ # The http port to use
+ http_port = 3000
+
+ # The public facing domain name used to access grafana from a browser
+ domain = localhost
+
+ # Redirect to correct domain if host header does not match domain
+ # Prevents DNS rebinding attacks
+ enforce_domain = false
+
+ # The full public facing url
+ root_url = %(protocol)s://%(domain)s:%(http_port)s/
+
+ # Log web requests
+ router_logging = false
+
+ # the path relative working path
+ static_root_path = public
+
+ # enable gzip
+ enable_gzip = false
+
+ # https certs & key file
+ cert_file = /etc/tls/private/tls.crt
+ cert_key = /etc/tls/private/tls.key
+
+ # Unix socket path
+ socket = /tmp/grafana.sock
+
+ #################################### Database ############################
+ [database]
+ # You can configure the database connection by specifying type, host, name, user and password
+ # as separate properties or as on string using the url property.
+
+ # Either "mysql", "postgres" or "sqlite3", it's your choice
+ type = sqlite3
+ host = 127.0.0.1:3306
+ name = grafana
+ user = root
+ # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;"""
+ password =
+ # Use either URL or the previous fields to configure the database
+ # Example: mysql://user:secret@host:port/database
+ url =
+
+ # Max idle conn setting default is 2
+ max_idle_conn = 2
+
+ # Max conn setting default is 0 (mean not set)
+ max_open_conn =
+
+ # For "postgres", use either "disable", "require" or "verify-full"
+ # For "mysql", use either "true", "false", or "skip-verify".
+ ssl_mode = disable
+
+ ca_cert_path =
+ client_key_path =
+ client_cert_path =
+ server_cert_name =
+
+ # For "sqlite3" only, path relative to data_path setting
+ path = grafana.db
+
+ #################################### Session #############################
+ [session]
+ # Either "memory", "file", "redis", "mysql", "postgres", "memcache", default is "file"
+ provider = file
+
+ # Provider config options
+ # memory: not have any config yet
+ # file: session dir path, is relative to grafana data_path
+ # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana`
+ # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable
+ # mysql: go-sql-driver/mysql dsn config string, examples:
+ # `user:password@tcp(127.0.0.1:3306)/database_name`
+ # `user:password@unix(/var/run/mysqld/mysqld.sock)/database_name`
+ # memcache: 127.0.0.1:11211
+
+
+ provider_config = sessions
+
+ # Session cookie name
+ cookie_name = grafana_sess
+
+ # If you use session in https only, default is false
+ cookie_secure = false
+
+ # Session life time, default is 86400
+ session_life_time = 86400
+ gc_interval_time = 86400
+
+ #################################### Data proxy ###########################
+ [dataproxy]
+
+ # This enables data proxy logging, default is false
+ logging = false
+
+ #################################### Analytics ###########################
+ [analytics]
+ # Server reporting, sends usage counters to stats.grafana.org every 24 hours.
+ # No ip addresses are being tracked, only simple counters to track
+ # running instances, dashboard and error counts. It is very helpful to us.
+ # Change this option to false to disable reporting.
+ reporting_enabled = true
+
+ # Set to false to disable all checks to https://grafana.com
+ # for new versions (grafana itself and plugins), check is used
+ # in some UI views to notify that grafana or plugin update exists
+ # This option does not cause any auto updates, nor send any information
+ # only a GET request to https://grafana.com to get latest versions
+ check_for_updates = true
+
+ # Google Analytics universal tracking code, only enabled if you specify an id here
+ google_analytics_ua_id =
+
+ # Google Tag Manager ID, only enabled if you specify an id here
+ google_tag_manager_id =
+
+ #################################### Security ############################
+ [security]
+ # default admin user, created on startup
+ admin_user = admin
+
+ # default admin password, can be changed before first start of grafana, or in profile settings
+ admin_password = admin
+
+ # used for signing
+ secret_key = SW2YcwTIb9zpOOhoPsMm
+
+ # Auto-login remember days
+ login_remember_days = 7
+ cookie_username = grafana_user
+ cookie_remember_name = grafana_remember
+
+ # disable gravatar profile images
+ disable_gravatar = false
+
+ # data source proxy whitelist (ip_or_domain:port separated by spaces)
+ data_source_proxy_whitelist =
+
+ [snapshots]
+ # snapshot sharing options
+ external_enabled = true
+ external_snapshot_url = https://snapshots-origin.raintank.io
+ external_snapshot_name = Publish to snapshot.raintank.io
+
+ # remove expired snapshot
+ snapshot_remove_expired = true
+
+ # remove snapshots after 90 days
+ snapshot_TTL_days = 90
+
+ #################################### Users ####################################
+ [users]
+ # disable user signup / registration
+ allow_sign_up = true
+
+ # Allow non admin users to create organizations
+ allow_org_create = true
+
+ # Set to true to automatically assign new users to the default organization (id 1)
+ auto_assign_org = true
+
+ # Default role new users will be automatically assigned (if auto_assign_org above is set to true)
+ auto_assign_org_role = Admin
+
+ # Require email validation before sign up completes
+ verify_email_enabled = false
+
+ # Background text for the user field on the login page
+ login_hint = email or username
+
+ # Default UI theme ("dark" or "light")
+ default_theme = dark
+
+ # External user management
+ external_manage_link_url =
+ external_manage_link_name =
+ external_manage_info =
+
+ [auth]
+ # Set to true to disable (hide) the login form, useful if you use OAuth
+ disable_login_form = true
+
+ # Set to true to disable the signout link in the side menu. useful if you use auth.proxy
+ disable_signout_menu = true
+
+ #################################### Anonymous Auth ######################
+ [auth.anonymous]
+ # enable anonymous access
+ enabled = true
+
+ # specify organization name that should be used for unauthenticated users
+ org_name = Main Org.
+
+ # specify role for unauthenticated users
+ org_role = Admin
+
+ #################################### Github Auth #########################
+ [auth.github]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ auth_url = https://github.com/login/oauth/authorize
+ token_url = https://github.com/login/oauth/access_token
+ api_url = https://api.github.com/user
+ team_ids =
+ allowed_organizations =
+
+ #################################### Google Auth #########################
+ [auth.google]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_client_id
+ client_secret = some_client_secret
+ scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email
+ auth_url = https://accounts.google.com/o/oauth2/auth
+ token_url = https://accounts.google.com/o/oauth2/token
+ api_url = https://www.googleapis.com/oauth2/v1/userinfo
+ allowed_domains =
+ hosted_domain =
+
+ #################################### Grafana.com Auth ####################
+ # legacy key names (so they work in env variables)
+ [auth.grafananet]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ allowed_organizations =
+
+ [auth.grafana_com]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ allowed_organizations =
+
+ #################################### Generic OAuth #######################
+ [auth.generic_oauth]
+ name = OAuth
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ auth_url =
+ token_url =
+ api_url =
+ team_ids =
+ allowed_organizations =
+
+ #################################### Basic Auth ##########################
+ [auth.basic]
+ enabled = false
+
+ #################################### Auth Proxy ##########################
+ [auth.proxy]
+ enabled = true
+ header_name = X-WEBAUTH-USER
+ header_property = username
+ auto_sign_up = true
+ ldap_sync_ttl = 60
+ whitelist =
+
+ #################################### Auth LDAP ###########################
+ [auth.ldap]
+ enabled = false
+ config_file = /etc/grafana/ldap.toml
+ allow_sign_up = true
+
+ #################################### SMTP / Emailing #####################
+ [smtp]
+ enabled = false
+ host = localhost:25
+ user =
+ # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;"""
+ password =
+ cert_file =
+ key_file =
+ skip_verify = false
+ from_address = admin@grafana.localhost
+ from_name = Grafana
+ ehlo_identity =
+
+ [emails]
+ welcome_email_on_sign_up = false
+ templates_pattern = emails/*.html
+
+ #################################### Logging ##########################
+ [log]
+ # Either "console", "file", "syslog". Default is console and file
+ # Use space to separate multiple modes, e.g. "console file"
+ mode = console file
+
+ # Either "debug", "info", "warn", "error", "critical", default is "info"
+ level = error
+
+ # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug
+ filters =
+
+ # For "console" mode only
+ [log.console]
+ level =
+
+ # log line format, valid options are text, console and json
+ format = console
+
+ # For "file" mode only
+ [log.file]
+ level =
+
+ # log line format, valid options are text, console and json
+ format = text
+
+ # This enables automated log rotate(switch of following options), default is true
+ log_rotate = true
+
+ # Max line number of single file, default is 1000000
+ max_lines = 1000000
+
+ # Max size shift of single file, default is 28 means 1 << 28, 256MB
+ max_size_shift = 28
+
+ # Segment log daily, default is true
+ daily_rotate = true
+
+ # Expired days of log file(delete after max days), default is 7
+ max_days = 7
+
+ [log.syslog]
+ level =
+
+ # log line format, valid options are text, console and json
+ format = text
+
+ # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used.
+ network =
+ address =
+
+ # Syslog facility. user, daemon and local0 through local7 are valid.
+ facility =
+
+ # Syslog tag. By default, the process' argv[0] is used.
+ tag =
+
+
+ #################################### AMQP Event Publisher ################
+ [event_publisher]
+ enabled = false
+ rabbitmq_url = amqp://localhost/
+ exchange = grafana_events
+
+ #################################### Dashboard JSON files ################
+ [dashboards.json]
+ enabled = false
+ path = /var/lib/grafana/dashboards
+
+ #################################### Usage Quotas ########################
+ [quota]
+ enabled = false
+
+ #### set quotas to -1 to make unlimited. ####
+ # limit number of users per Org.
+ org_user = 10
+
+ # limit number of dashboards per Org.
+ org_dashboard = 100
+
+ # limit number of data_sources per Org.
+ org_data_source = 10
+
+ # limit number of api_keys per Org.
+ org_api_key = 10
+
+ # limit number of orgs a user can create.
+ user_org = 10
+
+ # Global limit of users.
+ global_user = -1
+
+ # global limit of orgs.
+ global_org = -1
+
+ # global limit of dashboards
+ global_dashboard = -1
+
+ # global limit of api_keys
+ global_api_key = -1
+
+ # global limit on number of logged in users.
+ global_session = -1
+
+ #################################### Alerting ############################
+ [alerting]
+ # Disable alerting engine & UI features
+ enabled = true
+ # Makes it possible to turn off alert rule execution but alerting UI is visible
+ execute_alerts = true
+
+ #################################### Internal Grafana Metrics ############
+ # Metrics available at HTTP API Url /api/metrics
+ [metrics]
+ enabled = true
+ interval_seconds = 10
+
+ # Send internal Grafana metrics to graphite
+ [metrics.graphite]
+ # Enable by setting the address setting (ex localhost:2003)
+ address =
+ prefix = prod.grafana.%(instance_name)s.
+
+ [grafana_net]
+ url = https://grafana.com
+
+ [grafana_com]
+ url = https://grafana.com
+
+ #################################### Distributed tracing ############
+ [tracing.jaeger]
+ # jaeger destination (ex localhost:6831)
+ address =
+ # tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2)
+ always_included_tag =
+ # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
+ sampler_type = const
+ # jaeger samplerconfig param
+ # for "const" sampler, 0 or 1 for always false/true respectively
+ # for "probabilistic" sampler, a probability between 0 and 1
+ # for "rateLimiting" sampler, the number of spans per second
+ # for "remote" sampler, param is the same as for "probabilistic"
+ # and indicates the initial sampling rate before the actual one
+ # is received from the mothership
+ sampler_param = 1
+
+ #################################### External Image Storage ##############
+ [external_image_storage]
+ # You can choose between (s3, webdav, gcs)
+ provider =
+
+ [external_image_storage.s3]
+ bucket_url =
+ bucket =
+ region =
+ path =
+ access_key =
+ secret_key =
+
+ [external_image_storage.webdav]
+ url =
+ username =
+ password =
+ public_url =
+
+ [external_image_storage.gcs]
+ key_file =
+ bucket =
diff --git a/roles/openshift_grafana/files/grafana-ocp.yml b/roles/openshift_grafana/files/grafana-ocp.yml
new file mode 100644
index 000000000..bc7b4b286
--- /dev/null
+++ b/roles/openshift_grafana/files/grafana-ocp.yml
@@ -0,0 +1,76 @@
+---
+kind: Template
+apiVersion: v1
+metadata:
+ name: grafana-ocp
+ annotations:
+ "openshift.io/display-name": Grafana ocp
+ description: |
+ Grafana server with patched Prometheus datasource.
+ iconClass: icon-cogs
+ tags: "metrics,monitoring,grafana,prometheus"
+parameters:
+- description: External URL for the grafana route
+ name: ROUTE_URL
+ value: ""
+- description: The namespace to instantiate heapster under. Defaults to 'grafana'.
+ name: NAMESPACE
+ value: grafana
+objects:
+- apiVersion: route.openshift.io/v1
+ kind: Route
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ spec:
+ host: "${ROUTE_URL}"
+ to:
+ name: grafana-ocp
+- apiVersion: v1
+ kind: Service
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ selector:
+ name: grafana-ocp
+ ports:
+ - port: 8082
+ protocol: TCP
+ targetPort: grafana-http
+- apiVersion: v1
+ kind: ReplicationController
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ selector:
+ name: grafana-ocp
+ replicas: 1
+ template:
+ version: v1
+ metadata:
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ volumes:
+ - name: data
+ emptyDir: {}
+ containers:
+ - image: "mrsiano/grafana-ocp:latest"
+ name: grafana-ocp
+ ports:
+ - name: grafana-http
+ containerPort: 3000
+ volumeMounts:
+ - name: data
+ mountPath: "/root/go/src/github.com/grafana/grafana/data"
+ command:
+ - "./bin/grafana-server"
diff --git a/roles/openshift_grafana/files/openshift-cluster-monitoring.json b/roles/openshift_grafana/files/openshift-cluster-monitoring.json
new file mode 100644
index 000000000..f59ca997f
--- /dev/null
+++ b/roles/openshift_grafana/files/openshift-cluster-monitoring.json
@@ -0,0 +1,5138 @@
+{
+ "dashboard": {
+ "description": "Monitors Openshift cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.",
+ "editable": true,
+ "gnetId": 315,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "rows": [
+ {
+ "collapse": false,
+ "height": "200px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "height": "200px",
+ "id": 32,
+ "legend": {
+ "alignAsTable": false,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))",
+ "format": "time_series",
+ "instant": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "Received",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "Sent",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network I/O pressure",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Network I/O pressure",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "180px",
+ "id": 4,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Cluster memory usage",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "180px",
+ "id": 6,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Cluster CPU usage ",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "180px",
+ "id": 7,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Cluster filesystem usage",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 9,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "20%",
+ "prefix": "",
+ "prefixFontSize": "20%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Used",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 10,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 11,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": " cores",
+ "postfixFontSize": "30%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m]))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Used",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": " cores",
+ "postfixFontSize": "30%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 13,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Used",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 14,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Total usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 33,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) ",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "overall cpu usage",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cluster CPU Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "height": "",
+ "id": 17,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{ pod_name }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pods CPU usage ",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "% Usage",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pods CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "height": "",
+ "id": 24,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Containers Cores Usage",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Containers CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "height": "",
+ "id": 23,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ id }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "System services CPU usage ",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "System services CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 411,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 34,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_memory_usage_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ id }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "All processes Memory usage ",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "All processes CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 25,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ pod_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pods memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pods memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 26,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_rss{systemd_service_name=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (systemd_service_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ systemd_service_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "B",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "System services memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "System services memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 27,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "C",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Containers memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Containers memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "500px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 28,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ id }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "All processes memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "All processes memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 30,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "network",
+ "refId": "D",
+ "step": 1
+ },
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
+ "metric": "network",
+ "refId": "C",
+ "step": 1
+ },
+ {
+ "expr": "sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
+ "metric": "network",
+ "refId": "E",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
+ "metric": "network",
+ "refId": "F",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Containers network I/O ",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Containers network I/O",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 277,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 16,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> {{ pod_name }}",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- {{ pod_name }}",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pods network I/O ",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pods network I/O",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "500px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 29,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "instant": true,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> {{ id }}",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- {{ id }}",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "All processes network I/O ",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "All processes network I/O",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 35,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_build_total) by (phase,reason)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ phase }} | {{ reason }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_build_total",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 54,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "count(openshift_build_active_time_seconds{phase=\"running\"} offset 10m)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of builds that have been running for more than 10 minutes (600 seconds).",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 55,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "count(openshift_build_active_time_seconds{phase=\"pending\"} offset 10m)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of build that have been waiting at least 10 minutes (600 seconds) to start.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 56,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_build_total{phase=\"Failed\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of failed builds, regardless of the failure reason.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 57,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_build_total{phase=\"Failed\",reason=\"FetchSourceFailed\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ instance }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of failed builds because of problems retrieving source from the associated Git repository.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 58,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": false,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_build_total{phase=\"Complete\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of successfully completed builds.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 0,
+ "id": 59,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_build_total{phase=\"Failed\"} offset 5m",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ reason }}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the failed builds totals, per failure reason, from 5 minutes ago.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OpenShift Builds",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 36,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_sdn_pod_setup_latency_sum)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_pod_setup_latency_sum",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 41,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_sdn_pod_teardown_latency{quantile=\"0.9\"}) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_pod_teardown_latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 50,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "topk(10, (sum by (pod_name) (irate(container_network_receive_bytes_total{pod_name!=\"\"}[5m]))))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{ pod_name }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Top 10 pods doing the most receive network traffic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 37,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_sdn_pod_ips",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ instance }} | {{ role }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_pod_ips",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 39,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 42,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_sdn_arp_cache_entries",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ role }} | {{ instance }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_arp_cache_entries",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 40,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_sdn_arp_cache_entries",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_arp_cache_entries",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OpenShift SDN",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 44,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(kubelet_pleg_relist_latency_microseconds{kubernetes_io_hostname=~\"$Node\",quantile=\"0.9\"}[2m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ role }} | {{ instance }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "kubelet_pleg_relist",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 51,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}[2m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ operation_type }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 52,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "kubelet_docker_operations_timeout",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ operation_type }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns a running count (not a rate) of docker operations that have timed out since the kubelet was started.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 53,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "kubelet_docker_operations_errors",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ operation_type }}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns a running count (not a rate) of docker operations that have failed since the kubelet was started.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Kubelet",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 46,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(scrape_samples_scraped[2m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{ kubernetes_name }} | {{ instance }} ",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "scrape_samples_scraped",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 68,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum without (cpu) (irate(container_cpu_usage_seconds_total{container_name=\"prometheus\"}[5m])))",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU per instance of Prometheus container.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Prometheus",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 48,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb!~\"GET|LIST|WATCH\"}[2m]))) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ resource }} || {{ verb }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Number of mutating API requests being made to the control plane.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 49,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb=~\"GET|LIST|WATCH\"}[2m]))) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ resource }} || {{ pod }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Number of non-mutating API requests being made to the control plane.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 74,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "endpoint_queue_latency",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": " quantile {{ quantile }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "endpoint_queue_latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "API Server",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 61,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "etcd_disk_wal_fsync_duration_seconds_count",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "etcd_disk_wal_fsync_duration_seconds_count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "etcd",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 62,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(changes(container_start_time_seconds[10m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "The number of containers that start or restart over the last ten minutes.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Changes in your cluster",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 63,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(machine_cpu_cores)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total number of cores in the cluster.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 64,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(sort_desc(irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total number of consumed cores.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 65,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (kubernetes_io_hostname,type) (irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU consumed per node in the cluster.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 66,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (cpu,id,pod_name,container_name) (irate(container_cpu_usage_seconds_total{role=\"infra\"}[5m])))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU consumption per system service or container on the infrastructure nodes.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 67,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (namespace) (irate(container_cpu_usage_seconds_total[5m])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU consumed per namespace on the cluster.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 47,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{id=\"/\"}[3m])) / sum(machine_cpu_cores)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Percentage of total cluster CPU in use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 69,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_rss) / sum(machine_memory_bytes)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Percentage of total cluster memory in use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 70,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (kubernetes_io_hostname) (irate(container_cpu_usage_seconds_total{id=~\"/system.slice/(docker|etcd).service\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Aggregate CPU usage (seconds total) of etcd+docker",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "System and container CPU",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 71,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "title": "Kubernetes Storage Metrics via Prometheus",
+ "type": "absolute",
+ "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "volumes_queue_latency",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "volumes_queue_latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 72,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "title": "Kubernetes Storage Metrics via Prometheus",
+ "type": "absolute",
+ "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(cloudprovider_gce_api_request_duration_seconds_count[2m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ request }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "cloudprovider_aws_api_request_duration_seconds_count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 73,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "title": "Kubernetes Storage Metrics via Prometheus",
+ "type": "absolute",
+ "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate(storage_operation_duration_seconds_sum{kubernetes_io_hostname=~\"$Node\"}[2m])) by (operation_name,kubernetes_io_hostname)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ operation_name }} || {{ kubernetes_io_hostname }}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "storage_operation_duration_seconds_sum",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OpenShift Volumes",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "kubernetes",
+ "openshift"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": ".*",
+ "current": {},
+ "datasource": "${DS_PR}",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "Node",
+ "options": [],
+ "query": "label_values(kubernetes_io_hostname)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "1s",
+ "2m",
+ "20s",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "openshift cluster monitoring",
+ "version": 6
+ }
+}
diff --git a/roles/openshift_grafana/meta/main.yml b/roles/openshift_grafana/meta/main.yml
new file mode 100644
index 000000000..8dea6f197
--- /dev/null
+++ b/roles/openshift_grafana/meta/main.yml
@@ -0,0 +1,13 @@
+---
+galaxy_info:
+ author: Eldad Marciano
+ description: Setup grafana pod
+ company: Red Hat, Inc.
+ license: Apache License, Version 2.0
+ min_ansible_version: 2.3
+ platforms:
+ - name: EL
+ versions:
+ - 7
+ categories:
+ - metrics
diff --git a/roles/openshift_grafana/tasks/gf-permissions.yml b/roles/openshift_grafana/tasks/gf-permissions.yml
new file mode 100644
index 000000000..9d3c741ee
--- /dev/null
+++ b/roles/openshift_grafana/tasks/gf-permissions.yml
@@ -0,0 +1,12 @@
+---
+- name: Create gf user on htpasswd
+ command: htpasswd -c /etc/origin/master/htpasswd gfadmin
+
+- name: Make sure master config use HTPasswdPasswordIdentityProvider
+ command: "sed -ie 's|AllowAllPasswordIdentityProvider|HTPasswdPasswordIdentityProvider\n file: /etc/origin/master/htpasswd|' /etc/origin/master/master-config.yaml"
+
+- name: Grant permission for gfuser
+ command: oc adm policy add-cluster-role-to-user cluster-reader gfadmin
+
+- name: Restart mater api
+ command: systemctl restart atomic-openshift-master-api.service
diff --git a/roles/openshift_grafana/tasks/main.yml b/roles/openshift_grafana/tasks/main.yml
new file mode 100644
index 000000000..6a06d40a9
--- /dev/null
+++ b/roles/openshift_grafana/tasks/main.yml
@@ -0,0 +1,122 @@
+---
+- name: Create grafana namespace
+ oc_project:
+ state: present
+ name: grafana
+
+- name: Configure Grafana Permissions
+ include_tasks: tasks/gf-permissions.yml
+ when: gf_oauth | default(false) | bool == true
+
+# TODO: we should grab this yaml file from openshift/origin
+- name: Templatize grafana yaml
+ template: src=grafana-ocp.yaml dest=/tmp/grafana-ocp.yaml
+ register:
+ cl_file: /tmp/grafana-ocp.yaml
+ when: gf_oauth | default(false) | bool == false
+
+# TODO: we should grab this yaml file from openshift/origin
+- name: Templatize grafana yaml
+ template: src=grafana-ocp-oauth.yaml dest=/tmp/grafana-ocp-oauth.yaml
+ register:
+ cl_file: /tmp/grafana-ocp-oauth.yaml
+ when: gf_oauth | default(false) | bool == true
+
+- name: Process the grafana file
+ oc_process:
+ namespace: grafana
+ template_name: "{{ cl_file }}"
+ create: True
+ when: gf_oauth | default(false) | bool == true
+
+- name: Wait to grafana be running
+ command: oc rollout status deployment/grafana-ocp
+
+- name: oc adm policy add-role-to-user view -z grafana-ocp -n {{ gf_prometheus_namespace }}
+ oc_adm_policy_user:
+ user: grafana-ocp
+ resource_kind: cluster-role
+ resource_name: view
+ state: present
+ role_namespace: "{{ gf_prometheus_namespace }}"
+
+- name: Get grafana route
+ oc_obj:
+ kind: route
+ name: grafana
+ namespace: grafana
+ register: route
+
+- name: Get prometheus route
+ oc_obj:
+ kind: route
+ name: prometheus
+ namespace: "{{ gf_prometheus_namespace }}"
+ register: route
+
+- name: Get the prometheus SA
+ oc_serviceaccount_secret:
+ state: list
+ service_account: prometheus
+ namespace: "{{ gf_prometheus_namespace }}"
+ register: sa
+
+- name: Get the management SA bearer token
+ set_fact:
+ management_token: "{{ sa.results | oo_filter_sa_secrets }}"
+
+- name: Ensure the SA bearer token value is read
+ oc_secret:
+ state: list
+ name: "{{ management_token }}"
+ namespace: "{{ gf_prometheus_namespace }}"
+ no_log: True
+ register: sa_secret
+
+- name: Get the SA bearer token for prometheus
+ set_fact:
+ token: "{{ sa_secret.results.encoded.token }}"
+
+- name: Convert to json
+ var:
+ ds_json: "{{ gf_body_tmp }} | to_json }}"
+
+- name: Set protocol type
+ var:
+ protocol: "{{ 'https' if {{ gf_oauth }} == true else 'http' }}"
+
+- name: Add gf datasrouce
+ uri:
+ url: "{{ protocol }}://{{ route }}/api/datasources"
+ user: admin
+ password: admin
+ method: POST
+ body: "{{ ds_json | regex_replace('grafana_name', {{ gf_datasource_name }}) | regex_replace('prometheus_url', 'https://'{{ prometheus }} ) | regex_replace('satoken', {{ token }}) }}"
+ headers:
+ Content-Type: "Content-Type: application/json"
+ register: add_ds
+
+- name: Regex setup ds name
+ replace:
+ path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}"
+ regexp: '${DS_PR}'
+ replace: '{{ gf_datasource_name }}'
+ backup: yes
+
+- name: Add new dashboard
+ uri:
+ url: "{{ protocol }}://{{ route }}/api/dashboards/db"
+ user: admin
+ password: admin
+ method: POST
+ body: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}"
+ headers:
+ Content-Type: "Content-Type: application/json"
+ register: add_ds
+
+- name: Regex json tear down
+ replace:
+ path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}"
+ regexp: '${DS_PR}'
+ replace: '{{ gf_datasource_name }}'
+ backup: yes
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py
index 83e551b5d..b9c41d1b4 100644
--- a/roles/openshift_health_checker/openshift_checks/__init__.py
+++ b/roles/openshift_health_checker/openshift_checks/__init__.py
@@ -5,6 +5,7 @@ Health checks for OpenShift clusters.
import json
import operator
import os
+import re
import time
import collections
@@ -309,28 +310,38 @@ class OpenShiftCheck(object):
name_list = name_list.split(',')
return [name.strip() for name in name_list if name.strip()]
- @staticmethod
- def get_major_minor_version(openshift_image_tag):
+ def get_major_minor_version(self, openshift_image_tag=None):
"""Parse and return the deployed version of OpenShift as a tuple."""
- if openshift_image_tag and openshift_image_tag[0] == 'v':
- openshift_image_tag = openshift_image_tag[1:]
- # map major release versions across releases
- # to a common major version
- openshift_major_release_version = {
- "1": "3",
- }
+ version = openshift_image_tag or self.get_var("openshift_image_tag")
+ components = [int(component) for component in re.findall(r'\d+', version)]
- components = openshift_image_tag.split(".")
- if not components or len(components) < 2:
+ if len(components) < 2:
msg = "An invalid version of OpenShift was found for this host: {}"
- raise OpenShiftCheckException(msg.format(openshift_image_tag))
+ raise OpenShiftCheckException(msg.format(version))
+
+ # map major release version across releases to OCP major version
+ components[0] = {1: 3}.get(components[0], components[0])
+
+ return tuple(int(x) for x in components[:2])
+
+ def get_required_version(self, name, version_map):
+ """Return the correct required version(s) for the current (or nearest) OpenShift version."""
+ openshift_version = self.get_major_minor_version()
+
+ earliest = min(version_map)
+ latest = max(version_map)
+ if openshift_version < earliest:
+ return version_map[earliest]
+ if openshift_version > latest:
+ return version_map[latest]
- if components[0] in openshift_major_release_version:
- components[0] = openshift_major_release_version[components[0]]
+ required_version = version_map.get(openshift_version)
+ if not required_version:
+ msg = "There is no recommended version of {} for the current version of OpenShift ({})"
+ raise OpenShiftCheckException(msg.format(name, ".".join(str(comp) for comp in openshift_version)))
- components = tuple(int(x) for x in components[:2])
- return components
+ return required_version
def find_ansible_mount(self, path):
"""Return the mount point for path from ansible_mounts."""
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 7afb8f730..ac6ffbbad 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -56,7 +56,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
# ordered list of registries (according to inventory vars) that docker will try for unscoped images
regs = self.ensure_list("openshift_docker_additional_registries")
# currently one of these registries is added whether the user wants it or not.
- deployment_type = self.get_var("openshift_deployment_type")
+ deployment_type = self.get_var("openshift_deployment_type", default="")
if deployment_type == "origin" and "docker.io" not in regs:
regs.append("docker.io")
elif deployment_type == 'openshift-enterprise' and "registry.access.redhat.com" not in regs:
diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
index 986a01f38..7f8c6ebdc 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
@@ -170,7 +170,7 @@ class Elasticsearch(LoggingCheck):
"""
errors = []
for pod_name in pods_by_name.keys():
- df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name)
+ df_cmd = '-c elasticsearch exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name)
disk_output = self.exec_oc(df_cmd, [], save_as_name='get_pv_diskspace.json')
lines = disk_output.splitlines()
# expecting one header looking like 'IUse% Use%' and one body line
diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
index 3b1cf8baa..16ec3a7f6 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
@@ -5,12 +5,11 @@ Module for performing checks on a Kibana logging deployment
import json
import ssl
-try:
- from urllib2 import HTTPError, URLError
- import urllib2
-except ImportError:
- from urllib.error import HTTPError, URLError
- import urllib.request as urllib2
+# pylint can't find the package when its installed in virtualenv
+# pylint: disable=import-error,no-name-in-module
+from ansible.module_utils.six.moves.urllib import request
+# pylint: disable=import-error,no-name-in-module
+from ansible.module_utils.six.moves.urllib.error import HTTPError, URLError
from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException
@@ -65,7 +64,7 @@ class Kibana(LoggingCheck):
# Verify that the url is returning a valid response
try:
# We only care if the url connects and responds
- return_code = urllib2.urlopen(url, context=ctx).getcode()
+ return_code = request.urlopen(url, context=ctx).getcode()
except HTTPError as httperr:
return httperr.reason
except URLError as urlerr:
diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py
index 0cad19842..58a2692bd 100644
--- a/roles/openshift_health_checker/openshift_checks/ovs_version.py
+++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py
@@ -3,7 +3,7 @@ Ansible module for determining if an installed version of Open vSwitch is incomp
currently installed version of OpenShift.
"""
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException
+from openshift_checks import OpenShiftCheck
from openshift_checks.mixins import NotContainerizedMixin
@@ -16,10 +16,12 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck):
tags = ["health"]
openshift_to_ovs_version = {
- "3.7": ["2.6", "2.7", "2.8"],
- "3.6": ["2.6", "2.7", "2.8"],
- "3.5": ["2.6", "2.7"],
- "3.4": "2.4",
+ (3, 4): "2.4",
+ (3, 5): ["2.6", "2.7"],
+ (3, 6): ["2.6", "2.7", "2.8"],
+ (3, 7): ["2.6", "2.7", "2.8"],
+ (3, 8): ["2.6", "2.7", "2.8"],
+ (3, 9): ["2.6", "2.7", "2.8"],
}
def is_active(self):
@@ -40,16 +42,5 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck):
return self.execute_module("rpm_version", args)
def get_required_ovs_version(self):
- """Return the correct Open vSwitch version for the current OpenShift version"""
- openshift_version_tuple = self.get_major_minor_version(self.get_var("openshift_image_tag"))
-
- if openshift_version_tuple < (3, 5):
- return self.openshift_to_ovs_version["3.4"]
-
- openshift_version = ".".join(str(x) for x in openshift_version_tuple)
- ovs_version = self.openshift_to_ovs_version.get(openshift_version)
- if ovs_version:
- return self.openshift_to_ovs_version[openshift_version]
-
- msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}"
- raise OpenShiftCheckException(msg.format(openshift_version))
+ """Return the correct Open vSwitch version(s) for the current OpenShift version."""
+ return self.get_required_version("Open vSwitch", self.openshift_to_ovs_version)
diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py
index f3a628e28..28aee8b35 100644
--- a/roles/openshift_health_checker/openshift_checks/package_version.py
+++ b/roles/openshift_health_checker/openshift_checks/package_version.py
@@ -1,8 +1,6 @@
"""Check that available RPM packages match the required versions."""
-import re
-
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException
+from openshift_checks import OpenShiftCheck
from openshift_checks.mixins import NotContainerizedMixin
@@ -18,6 +16,8 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
(3, 5): ["2.6", "2.7"],
(3, 6): ["2.6", "2.7", "2.8"],
(3, 7): ["2.6", "2.7", "2.8"],
+ (3, 8): ["2.6", "2.7", "2.8"],
+ (3, 9): ["2.6", "2.7", "2.8"],
}
openshift_to_docker_version = {
@@ -27,11 +27,9 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
(3, 4): "1.12",
(3, 5): "1.12",
(3, 6): "1.12",
- }
-
- # map major OpenShift release versions across releases to a common major version
- map_major_release_version = {
- 1: 3,
+ (3, 7): "1.12",
+ (3, 8): "1.12",
+ (3, 9): ["1.12", "1.13"],
}
def is_active(self):
@@ -83,48 +81,8 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
def get_required_ovs_version(self):
"""Return the correct Open vSwitch version(s) for the current OpenShift version."""
- openshift_version = self.get_openshift_version_tuple()
-
- earliest = min(self.openshift_to_ovs_version)
- latest = max(self.openshift_to_ovs_version)
- if openshift_version < earliest:
- return self.openshift_to_ovs_version[earliest]
- if openshift_version > latest:
- return self.openshift_to_ovs_version[latest]
-
- ovs_version = self.openshift_to_ovs_version.get(openshift_version)
- if not ovs_version:
- msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}"
- raise OpenShiftCheckException(msg.format(".".join(str(comp) for comp in openshift_version)))
-
- return ovs_version
+ return self.get_required_version("Open vSwitch", self.openshift_to_ovs_version)
def get_required_docker_version(self):
"""Return the correct Docker version(s) for the current OpenShift version."""
- openshift_version = self.get_openshift_version_tuple()
-
- earliest = min(self.openshift_to_docker_version)
- latest = max(self.openshift_to_docker_version)
- if openshift_version < earliest:
- return self.openshift_to_docker_version[earliest]
- if openshift_version > latest:
- return self.openshift_to_docker_version[latest]
-
- docker_version = self.openshift_to_docker_version.get(openshift_version)
- if not docker_version:
- msg = "There is no recommended version of Docker for the current version of OpenShift: {}"
- raise OpenShiftCheckException(msg.format(".".join(str(comp) for comp in openshift_version)))
-
- return docker_version
-
- def get_openshift_version_tuple(self):
- """Return received image tag as a normalized (X, Y) minor version tuple."""
- version = self.get_var("openshift_image_tag")
- comps = [int(component) for component in re.findall(r'\d+', version)]
-
- if len(comps) < 2:
- msg = "An invalid version of OpenShift was found for this host: {}"
- raise OpenShiftCheckException(msg.format(version))
-
- comps[0] = self.map_major_release_version.get(comps[0], comps[0])
- return tuple(comps[0:2])
+ return self.get_required_version("Docker", self.openshift_to_docker_version)
diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py
index 04a5e89c4..750d4b9e9 100644
--- a/roles/openshift_health_checker/test/kibana_test.py
+++ b/roles/openshift_health_checker/test/kibana_test.py
@@ -1,12 +1,10 @@
import pytest
import json
-try:
- import urllib2
- from urllib2 import HTTPError, URLError
-except ImportError:
- from urllib.error import HTTPError, URLError
- import urllib.request as urllib2
+# pylint can't find the package when its installed in virtualenv
+from ansible.module_utils.six.moves.urllib import request # pylint: disable=import-error
+# pylint: disable=import-error
+from ansible.module_utils.six.moves.urllib.error import HTTPError, URLError
from openshift_checks.logging.kibana import Kibana, OpenShiftCheckException
@@ -202,7 +200,7 @@ def test_verify_url_external_failure(lib_result, expect, monkeypatch):
if type(lib_result) is int:
return _http_return(lib_result)
raise lib_result
- monkeypatch.setattr(urllib2, 'urlopen', urlopen)
+ monkeypatch.setattr(request, 'urlopen', urlopen)
check = Kibana()
check._get_kibana_url = lambda: 'url'
diff --git a/roles/openshift_health_checker/test/ovs_version_test.py b/roles/openshift_health_checker/test/ovs_version_test.py
index 0238f49d5..80c7a0541 100644
--- a/roles/openshift_health_checker/test/ovs_version_test.py
+++ b/roles/openshift_health_checker/test/ovs_version_test.py
@@ -1,26 +1,7 @@
import pytest
-from openshift_checks.ovs_version import OvsVersion, OpenShiftCheckException
-
-
-def test_openshift_version_not_supported():
- def execute_module(*_):
- return {}
-
- openshift_release = '111.7.0'
-
- task_vars = dict(
- openshift=dict(common=dict()),
- openshift_release=openshift_release,
- openshift_image_tag='v' + openshift_release,
- openshift_deployment_type='origin',
- openshift_service_type='origin'
- )
-
- with pytest.raises(OpenShiftCheckException) as excinfo:
- OvsVersion(execute_module, task_vars).run()
-
- assert "no recommended version of Open vSwitch" in str(excinfo.value)
+from openshift_checks.ovs_version import OvsVersion
+from openshift_checks import OpenShiftCheckException
def test_invalid_openshift_release_format():
diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py
index d2916f617..868b4bd12 100644
--- a/roles/openshift_health_checker/test/package_version_test.py
+++ b/roles/openshift_health_checker/test/package_version_test.py
@@ -1,6 +1,7 @@
import pytest
-from openshift_checks.package_version import PackageVersion, OpenShiftCheckException
+from openshift_checks.package_version import PackageVersion
+from openshift_checks import OpenShiftCheckException
def task_vars_for(openshift_release, deployment_type):
@@ -18,7 +19,7 @@ def task_vars_for(openshift_release, deployment_type):
def test_openshift_version_not_supported():
check = PackageVersion(None, task_vars_for("1.2.3", 'origin'))
- check.get_openshift_version_tuple = lambda: (3, 4, 1) # won't be in the dict
+ check.get_major_minor_version = lambda: (3, 4, 1) # won't be in the dict
with pytest.raises(OpenShiftCheckException) as excinfo:
check.get_required_ovs_version()
diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml
index ff62b6136..f82e55b98 100644
--- a/roles/openshift_logging/tasks/install_logging.yaml
+++ b/roles/openshift_logging/tasks/install_logging.yaml
@@ -87,7 +87,7 @@
openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type }}"
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}"
- openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name }}"
+ openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}"
openshift_logging_elasticsearch_nodeselector: "{{ openshift_logging_es_nodeselector if outer_item.0.nodeSelector | default(None) is none else outer_item.0.nodeSelector }}"
openshift_logging_elasticsearch_storage_group: "{{ [openshift_logging_es_storage_group] if outer_item.0.storageGroups | default([]) | length == 0 else outer_item.0.storageGroups }}"
_es_containers: "{{ outer_item.0.containers}}"
@@ -114,7 +114,7 @@
openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type }}"
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}"
- openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name }}"
+ openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}"
with_sequence: count={{ openshift_logging_es_cluster_size | int - openshift_logging_facts.elasticsearch.deploymentconfigs.keys() | count }}
loop_control:
@@ -151,7 +151,7 @@
openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_ops_pvc_size }}"
openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_ops_pvc_dynamic }}"
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}"
- openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name }}"
+ openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name | default() }}"
openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}"
openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}"
openshift_logging_elasticsearch_cpu_request: "{{ openshift_logging_es_ops_cpu_request }}"
@@ -193,7 +193,7 @@
openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_ops_pvc_size }}"
openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_ops_pvc_dynamic }}"
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}"
- openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name }}"
+ openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name | default() }}"
openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}"
openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}"
openshift_logging_elasticsearch_cpu_request: "{{ openshift_logging_es_ops_cpu_request }}"
diff --git a/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml b/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml
index 9182bddb2..16de6f252 100644
--- a/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml
@@ -1,6 +1,6 @@
---
- command: >
- oc get pod -l component=es,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+ oc get pod -l component=es,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
register: _cluster_pods
- name: "Getting ES version for logging-es cluster"
@@ -10,7 +10,7 @@
when: _cluster_pods.stdout_lines | count > 0
- command: >
- oc get pod -l component=es-ops,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+ oc get pod -l component=es-ops,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
register: _ops_cluster_pods
- name: "Getting ES version for logging-es-ops cluster"
diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
index d55beec86..6bce13d1d 100644
--- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
@@ -19,7 +19,7 @@
## get all pods for the cluster
- command: >
- oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+ oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
register: _cluster_pods
- name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
@@ -64,7 +64,7 @@
## we may need a new first pod to run against -- fetch them all again
- command: >
- oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+ oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
register: _cluster_pods
- name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
diff --git a/roles/openshift_metrics/tasks/oc_apply.yaml b/roles/openshift_metrics/tasks/oc_apply.yaml
index 8ccfb7192..057963c1a 100644
--- a/roles/openshift_metrics/tasks/oc_apply.yaml
+++ b/roles/openshift_metrics/tasks/oc_apply.yaml
@@ -16,7 +16,9 @@
apply -f {{ file_name }}
-n {{namespace}}
register: generation_apply
- failed_when: "'error' in generation_apply.stderr"
+ failed_when:
+ - "'error' in generation_apply.stderr"
+ - "generation_apply.rc != 0"
changed_when: no
- name: Determine change status of {{file_content.kind}} {{file_content.metadata.name}}
@@ -28,5 +30,7 @@
register: version_changed
vars:
init_version: "{{ (generation_init is defined) | ternary(generation_init.stdout, '0') }}"
- failed_when: "'error' in version_changed.stderr"
+ failed_when:
+ - "'error' in version_changed.stderr"
+ - "version_changed.rc != 0"
changed_when: version_changed.stdout | int > init_version | int
diff --git a/roles/openshift_node/defaults/main.yml b/roles/openshift_node/defaults/main.yml
index c1fab4382..0b10413c5 100644
--- a/roles/openshift_node/defaults/main.yml
+++ b/roles/openshift_node/defaults/main.yml
@@ -48,6 +48,12 @@ openshift_node_kubelet_args_dict:
cloud-config:
- "{{ openshift_config_base ~ '/cloudprovider/gce.conf' }}"
node-labels: "{{ l_node_kubelet_node_labels }}"
+ azure:
+ cloud-provider:
+ - azure
+ cloud-config:
+ - "{{ openshift_config_base ~ '/cloudprovider/azure.conf' }}"
+ node-labels: "{{ l_node_kubelet_node_labels }}"
undefined:
node-labels: "{{ l_node_kubelet_node_labels }}"
diff --git a/roles/openshift_persistent_volumes/tasks/pv.yml b/roles/openshift_persistent_volumes/tasks/pv.yml
index ef9ab7f5f..865269b7a 100644
--- a/roles/openshift_persistent_volumes/tasks/pv.yml
+++ b/roles/openshift_persistent_volumes/tasks/pv.yml
@@ -13,5 +13,5 @@
--config={{ mktemp.stdout }}/admin.kubeconfig
register: pv_create_output
when: persistent_volumes | length > 0
- failed_when: ('already exists' not in pv_create_output.stderr) and ('created' not in pv_create_output.stdout)
+ failed_when: "('already exists' not in pv_create_output.stderr) and ('created' not in pv_create_output.stdout) and pv_create_output.rc != 0"
changed_when: ('created' in pv_create_output.stdout)
diff --git a/roles/openshift_persistent_volumes/tasks/pvc.yml b/roles/openshift_persistent_volumes/tasks/pvc.yml
index 2c5519192..6c12d128c 100644
--- a/roles/openshift_persistent_volumes/tasks/pvc.yml
+++ b/roles/openshift_persistent_volumes/tasks/pvc.yml
@@ -13,5 +13,5 @@
--config={{ mktemp.stdout }}/admin.kubeconfig
register: pvc_create_output
when: persistent_volume_claims | length > 0
- failed_when: ('already exists' not in pvc_create_output.stderr) and ('created' not in pvc_create_output.stdout)
+ failed_when: "('already exists' not in pvc_create_output.stderr) and ('created' not in pvc_create_output.stdout) and pvc_create_output.rc != 0"
changed_when: ('created' in pvc_create_output.stdout)
diff --git a/roles/openshift_provisioners/tasks/oc_apply.yaml b/roles/openshift_provisioners/tasks/oc_apply.yaml
index a4ce53eae..239e1f1cc 100644
--- a/roles/openshift_provisioners/tasks/oc_apply.yaml
+++ b/roles/openshift_provisioners/tasks/oc_apply.yaml
@@ -15,7 +15,9 @@
apply -f {{ file_name }}
-n {{ namespace }}
register: generation_apply
- failed_when: "'error' in generation_apply.stderr"
+ failed_when:
+ - "'error' in generation_apply.stderr"
+ - "generation_apply.rc != 0"
changed_when: no
- name: Determine change status of {{file_content.kind}} {{file_content.metadata.name}}
@@ -36,7 +38,9 @@
delete -f {{ file_name }}
-n {{ namespace }}
register: generation_delete
- failed_when: "'error' in generation_delete.stderr"
+ failed_when:
+ - "'error' in generation_delete.stderr"
+ - "generation_delete.rc != 0"
changed_when: generation_delete.rc == 0
when: generation_apply.rc != 0
@@ -46,6 +50,8 @@
apply -f {{ file_name }}
-n {{ namespace }}
register: generation_apply
- failed_when: "'error' in generation_apply.stderr"
+ failed_when:
+ - "'error' in generation_apply.stderr"
+ - "generation_apply.rc != 0"
changed_when: generation_apply.rc == 0
when: generation_apply.rc != 0
diff --git a/roles/openshift_web_console/vars/default_images.yml b/roles/openshift_web_console/vars/default_images.yml
index 7adb8a0d0..42d331ac5 100644
--- a/roles/openshift_web_console/vars/default_images.yml
+++ b/roles/openshift_web_console/vars/default_images.yml
@@ -1,4 +1,4 @@
---
-__openshift_web_console_prefix: "docker.io/openshift/"
+__openshift_web_console_prefix: "docker.io/openshift/origin-"
__openshift_web_console_version: "latest"
-__openshift_web_console_image_name: "origin-web-console"
+__openshift_web_console_image_name: "web-console"
diff --git a/roles/openshift_web_console/vars/openshift-enterprise.yml b/roles/openshift_web_console/vars/openshift-enterprise.yml
index 721ac1d27..375c22067 100644
--- a/roles/openshift_web_console/vars/openshift-enterprise.yml
+++ b/roles/openshift_web_console/vars/openshift-enterprise.yml
@@ -1,4 +1,4 @@
---
-__openshift_web_console_prefix: "registry.access.redhat.com/openshift3/"
+__openshift_web_console_prefix: "registry.access.redhat.com/openshift3/ose-"
__openshift_web_console_version: "v3.9"
-__openshift_web_console_image_name: "ose-web-console"
+__openshift_web_console_image_name: "web-console"
diff --git a/roles/template_service_broker/vars/default_images.yml b/roles/template_service_broker/vars/default_images.yml
index 662d65d9f..dc164a4db 100644
--- a/roles/template_service_broker/vars/default_images.yml
+++ b/roles/template_service_broker/vars/default_images.yml
@@ -1,4 +1,4 @@
---
-__template_service_broker_prefix: "docker.io/openshift/"
+__template_service_broker_prefix: "docker.io/openshift/origin-"
__template_service_broker_version: "latest"
-__template_service_broker_image_name: "origin-template-service-broker"
+__template_service_broker_image_name: "template-service-broker"
diff --git a/roles/template_service_broker/vars/openshift-enterprise.yml b/roles/template_service_broker/vars/openshift-enterprise.yml
index 16a08e72f..b65b97691 100644
--- a/roles/template_service_broker/vars/openshift-enterprise.yml
+++ b/roles/template_service_broker/vars/openshift-enterprise.yml
@@ -1,4 +1,4 @@
---
-__template_service_broker_prefix: "registry.access.redhat.com/openshift3/"
+__template_service_broker_prefix: "registry.access.redhat.com/openshift3/ose-"
__template_service_broker_version: "v3.7"
-__template_service_broker_image_name: "ose-template-service-broker"
+__template_service_broker_image_name: "template-service-broker"