From 36a7c5bf5c69511020c516a33ca6b3e57aff485d Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Thu, 6 Jul 2017 11:28:26 -0400 Subject: Add retries to node restart handlers --- roles/openshift_node/handlers/main.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'roles/openshift_node') diff --git a/roles/openshift_node/handlers/main.yml b/roles/openshift_node/handlers/main.yml index a6bd12d4e..0d4af9f53 100644 --- a/roles/openshift_node/handlers/main.yml +++ b/roles/openshift_node/handlers/main.yml @@ -15,7 +15,13 @@ systemd: name: "{{ openshift.common.service_type }}-node" state: restarted - when: (not skip_node_svc_handlers | default(False) | bool) and not (node_service_status_changed | default(false) | bool) + register: l_openshift_node_restart_node_result + until: not l_openshift_node_restart_node_result | failed + retries: 3 + delay: 30 + when: + - (not skip_node_svc_handlers | default(False) | bool) + - not (node_service_status_changed | default(false) | bool) - name: reload sysctl.conf command: /sbin/sysctl -p -- cgit v1.2.3 From a05fbeb6135864fedfb648644b06702ee1afea68 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 10 Jul 2017 13:47:51 -0400 Subject: Wrap additional service changes in retries --- roles/openshift_node/handlers/main.yml | 5 +++++ roles/openshift_node/tasks/main.yml | 3 +++ 2 files changed, 8 insertions(+) (limited to 'roles/openshift_node') diff --git a/roles/openshift_node/handlers/main.yml b/roles/openshift_node/handlers/main.yml index 0d4af9f53..6b38da7f8 100644 --- a/roles/openshift_node/handlers/main.yml +++ b/roles/openshift_node/handlers/main.yml @@ -4,9 +4,14 @@ name: openvswitch state: restarted when: (not skip_node_svc_handlers | default(False) | bool) and not (ovs_service_status_changed | default(false) | bool) and openshift.common.use_openshift_sdn | bool + register: l_openshift_node_stop_openvswitch_result + until: not l_openshift_node_stop_openvswitch_result | failed + retries: 3 + delay: 30 notify: - restart openvswitch pause + - name: restart openvswitch pause pause: seconds=15 when: (not skip_node_svc_handlers | default(False) | bool) and openshift.common.is_containerized | bool diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 573051504..dd47d643b 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -120,6 +120,9 @@ state: started when: openshift.common.is_containerized | bool and openshift.common.use_openshift_sdn | bool register: ovs_start_result + until: not ovs_start_result | failed + retries: 3 + delay: 30 - set_fact: ovs_service_status_changed: "{{ ovs_start_result | changed }}" -- cgit v1.2.3 From 4c8f1c1269aa8fa527816ad63a295f8863d8a6f8 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Fri, 14 Jul 2017 16:01:13 -0400 Subject: daemon_reload on node and ovs start At least in my smoke testing of a containerized install i had to manually reload systemd --- roles/openshift_node/tasks/main.yml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'roles/openshift_node') diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index dd47d643b..05721c882 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -118,6 +118,7 @@ name: openvswitch.service enabled: yes state: started + daemon_reload: yes when: openshift.common.is_containerized | bool and openshift.common.use_openshift_sdn | bool register: ovs_start_result until: not ovs_start_result | failed @@ -220,6 +221,7 @@ name: "{{ openshift.common.service_type }}-node" enabled: yes state: started + daemon_reload: yes register: node_start_result until: not node_start_result | failed retries: 1 -- cgit v1.2.3 From 44fb8d5d9825bd5a708062dfe371763566d014e7 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Tue, 18 Jul 2017 08:19:27 -0400 Subject: Dump some logs --- roles/openshift_node/tasks/main.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'roles/openshift_node') diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 05721c882..879f6c207 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -216,6 +216,7 @@ state: started when: openshift.common.is_containerized | bool + - name: Start and enable node systemd: name: "{{ openshift.common.service_type }}-node" @@ -226,6 +227,16 @@ until: not node_start_result | failed retries: 1 delay: 30 + ignore_errors: true + +- name: Dump logs from node service if it failed + command: journalctl --no-pager -n 100 {{ openshift.common.service_type }}-node + when: node_start_result | failed + +- name: Abort if node failed to start + fail: + msg: Node failed to start please inspect the logs and try again + when: node_start_result | failed - set_fact: node_service_status_changed: "{{ node_start_result | changed }}" -- cgit v1.2.3