summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks/etcd_traffic.py')
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_traffic.py44
1 files changed, 44 insertions, 0 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
new file mode 100644
index 000000000..b4c8957e9
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
@@ -0,0 +1,44 @@
+"""Check that scans journalctl for messages caused as a symptom of increased etcd traffic."""
+
+from openshift_checks import OpenShiftCheck
+
+
+class EtcdTraffic(OpenShiftCheck):
+ """Check if host is being affected by an increase in etcd traffic."""
+
+ name = "etcd_traffic"
+ tags = ["health", "etcd"]
+
+ def is_active(self):
+ """Skip hosts that do not have etcd in their group names."""
+ group_names = self.get_var("group_names", default=[])
+ valid_group_names = "etcd" in group_names
+
+ version = self.get_major_minor_version(self.get_var("openshift_image_tag"))
+ valid_version = version in ((3, 4), (3, 5))
+
+ return super(EtcdTraffic, self).is_active() and valid_group_names and valid_version
+
+ def run(self):
+ is_containerized = self.get_var("openshift", "common", "is_containerized")
+ unit = "etcd_container" if is_containerized else "etcd"
+
+ log_matchers = [{
+ "start_regexp": r"Starting Etcd Server",
+ "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
+ "unit": unit
+ }]
+
+ match = self.execute_module("search_journalctl", {"log_matchers": log_matchers})
+
+ if match.get("matched"):
+ msg = ("Higher than normal etcd traffic detected.\n"
+ "OpenShift 3.4 introduced an increase in etcd traffic.\n"
+ "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
+ "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
+ return {"failed": True, "msg": msg}
+
+ if match.get("failed"):
+ return {"failed": True, "msg": "\n".join(match.get("errors"))}
+
+ return {}