From 1960ee8c4db904e7c2d4a9a76d12edf7183894e2 Mon Sep 17 00:00:00 2001 From: Tobias Florek Date: Tue, 14 Jun 2016 15:07:14 +0200 Subject: also volume-mount /etc/sysconfig/docker --- roles/openshift_node/templates/openshift.docker.node.dep.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'roles/openshift_node') diff --git a/roles/openshift_node/templates/openshift.docker.node.dep.service b/roles/openshift_node/templates/openshift.docker.node.dep.service index f66a78479..0fb34cffd 100644 --- a/roles/openshift_node/templates/openshift.docker.node.dep.service +++ b/roles/openshift_node/templates/openshift.docker.node.dep.service @@ -6,6 +6,6 @@ Before={{ openshift.common.service_type }}-node.service [Service] -ExecStart=/bin/bash -c "if [[ -f /usr/bin/docker-current ]]; then echo \"DOCKER_ADDTL_BIND_MOUNTS=--volume=/usr/bin/docker-current:/usr/bin/docker-current:ro\" > /etc/sysconfig/{{ openshift.common.service_type }}-node-dep; else echo \"#DOCKER_ADDTL_BIND_MOUNTS=\" > /etc/sysconfig/{{ openshift.common.service_type }}-node-dep; fi" +ExecStart=/bin/bash -c "if [[ -f /usr/bin/docker-current ]]; then echo \"DOCKER_ADDTL_BIND_MOUNTS=--volume=/usr/bin/docker-current:/usr/bin/docker-current:ro --volume=/etc/sysconfig/docker:/etc/sysconfig/docker:ro\" > /etc/sysconfig/{{ openshift.common.service_type }}-node-dep; else echo \"#DOCKER_ADDTL_BIND_MOUNTS=\" > /etc/sysconfig/{{ openshift.common.service_type }}-node-dep; fi" ExecStop= SyslogIdentifier={{ openshift.common.service_type }}-node-dep -- cgit v1.2.3 From a7b4676ee4a5388efc8b801a79c62e5e7627c467 Mon Sep 17 00:00:00 2001 From: Devan Goodwin Date: Tue, 14 Jun 2016 10:24:59 -0300 Subject: Attempt to fix containerized node start failure with Docker 1.10. It appears that in some situations (can't reliably reproduce yet), node will fail to start. This appears to be related to the node-dep service and possibly it's environment file. This file is also an EnvironmentFile for the node service, but it's only created by the node-dep service, and it looks like it may try to read it's environment before the node-dep service has fully started and created the file. Workaround with a an explicit service start. --- roles/openshift_node/tasks/main.yml | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'roles/openshift_node') diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 657e99e87..242437a85 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -110,6 +110,10 @@ changed_when: false when: openshift.common.is_containerized | bool +- name: Start and enable node dep + service: name={{ openshift.common.service_type }}-node-dep enabled=yes state=started + when: openshift.common.is_containerized | bool + - name: Start and enable node service: name={{ openshift.common.service_type }}-node enabled=yes state=started register: node_start_result -- cgit v1.2.3 From 02d4d3ebea97df7fb8ae33233ffb0c5fef3c5bda Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Thu, 16 Jun 2016 17:25:02 -0400 Subject: Stop dumping debug output, re-try startng the node once --- roles/openshift_node/tasks/main.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'roles/openshift_node') diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 242437a85..b5393e3cf 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -118,14 +118,10 @@ service: name={{ openshift.common.service_type }}-node enabled=yes state=started register: node_start_result ignore_errors: yes - -- name: Check logs on failure - command: journalctl -xe - register: node_failure - when: node_start_result | failed - -- name: Dump failure information - debug: var=node_failure + +- name: Start and enable node again + service: name={{ openshift.common.service_type }}-node enabled=yes state=started + register: node_start_result when: node_start_result | failed - set_fact: -- cgit v1.2.3 From 555ab6db54423fde7440e8ca4178d44a442f32f2 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Fri, 17 Jun 2016 11:59:54 -0400 Subject: Add 30 second pause before retrying to start the node --- roles/openshift_node/tasks/main.yml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'roles/openshift_node') diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index b5393e3cf..6aac0dc21 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -119,6 +119,11 @@ register: node_start_result ignore_errors: yes +- name: Wait 30 seconds for docker initialization whenever node has failed + pause: + seconds: 30 + when: node_start_result | failed + - name: Start and enable node again service: name={{ openshift.common.service_type }}-node enabled=yes state=started register: node_start_result -- cgit v1.2.3