From 1b9c54b63f6d33b0a612e1a35503d2027745888d Mon Sep 17 00:00:00 2001 From: Eric Wolinetz Date: Tue, 5 Dec 2017 17:08:57 -0600 Subject: Removing config trigger for ES DC, updating to use a handler to rollout ES at the end of a deployment, allowing for override with variable --- .../handlers/main.yml | 13 ++++++++ .../tasks/main.yaml | 21 +++++++++++++ .../tasks/restart_cluster.yml | 35 ++++++++++++++++++++++ .../tasks/restart_es_node.yml | 35 ++++++++++++++++++++++ .../templates/es.j2 | 1 + .../openshift_logging_elasticsearch/vars/main.yml | 2 ++ 6 files changed, 107 insertions(+) create mode 100644 roles/openshift_logging_elasticsearch/handlers/main.yml create mode 100644 roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml create mode 100644 roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml (limited to 'roles/openshift_logging_elasticsearch') diff --git a/roles/openshift_logging_elasticsearch/handlers/main.yml b/roles/openshift_logging_elasticsearch/handlers/main.yml new file mode 100644 index 000000000..fa56897d0 --- /dev/null +++ b/roles/openshift_logging_elasticsearch/handlers/main.yml @@ -0,0 +1,13 @@ +--- +- name: "Restarting logging-{{ _cluster_component }} cluster" + listen: "restart elasticsearch" + include_tasks: restart_cluster.yml + with_items: "{{ _restart_logging_components }}" + loop_control: + loop_var: _cluster_component + when: not logging_elasticsearch_rollout_override | bool + +## Stop this from running more than once +- set_fact: + logging_elasticsearch_rollout_override: True + listen: "restart elasticsearch" diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index 8f2050043..5fe683ae5 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -204,7 +204,21 @@ from_file: elasticsearch.yml: "{{ tempdir }}/elasticsearch.yml" logging.yml: "{{ tempdir }}/elasticsearch-logging.yml" + register: es_config_creation + notify: "restart elasticsearch" +- when: es_config_creation.changed | bool + block: + - set_fact: + _restart_logging_components: "{{ _restart_logging_components | default([]) + [es_component] | unique }}" + + - shell: > + oc get dc -l component="{{ es_component }}" -n "{{ openshift_logging_elasticsearch_namespace }}" -o name | cut -d'/' -f2 + register: _es_dcs + + - set_fact: + _restart_logging_nodes: "{{ _restart_logging_nodes | default([]) + [_es_dcs.stdout] | unique }}" + when: _es_dcs.stdout != "" # secret - name: Set ES secret @@ -375,6 +389,13 @@ files: - "{{ tempdir }}/templates/logging-es-dc.yml" delete_after: true + register: es_dc_creation + notify: "restart elasticsearch" + +- set_fact: + _restart_logging_components: "{{ _restart_logging_components | default([]) + [es_component] | unique }}" + _restart_logging_nodes: "{{ _restart_logging_nodes | default([]) + [es_deploy_name] | unique }}" + when: es_dc_creation.changed | bool - name: Retrieving the cert to use when generating secrets for the {{ es_component }} component slurp: diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml new file mode 100644 index 000000000..4a32453e3 --- /dev/null +++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml @@ -0,0 +1,35 @@ +--- +## get all pods for the cluster +- command: > + oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + register: _cluster_pods + +- name: "Disable shard balancing for logging-{{ _cluster_component }} cluster" + command: > + oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }' + register: _disable_output + changed_when: "'\"acknowledged\":true' in _disable_output.stdout" + when: _cluster_pods.stdout_lines | count > 0 + +- command: > + oc get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + register: _cluster_dcs + +## restart the node if it's dc is in the list of nodes to restart? +- name: "Restart ES node {{ _es_node }}" + include_tasks: restart_es_node.yml + with_items: "{{ _restart_logging_nodes }}" + loop_control: + loop_var: _es_node + when: _es_node in _cluster_dcs.stdout + +## we may need a new first pod to run against -- fetch them all again +- command: > + oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + register: _cluster_pods + +- name: "Enable shard balancing for logging-{{ _cluster_component }} cluster" + command: > + oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }' + register: _enable_output + changed_when: "'\"acknowledged\":true' in _enable_output.stdout" diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml new file mode 100644 index 000000000..b07b232ce --- /dev/null +++ b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml @@ -0,0 +1,35 @@ +--- +- name: "Rolling out new pod(s) for {{ _es_node }}" + command: > + oc rollout latest {{ _es_node }} -n {{ openshift_logging_elasticsearch_namespace }} + +- name: "Waiting for {{ _es_node }} to finish scaling up" + oc_obj: + state: list + name: "{{ _es_node }}" + namespace: "{{ openshift_logging_elasticsearch_namespace }}" + kind: dc + register: _dc_output + until: + - _dc_output.results.results[0].status is defined + - _dc_output.results.results[0].status.readyReplicas is defined + - _dc_output.results.results[0].status.readyReplicas > 0 + retries: 60 + delay: 30 + +- name: Gettings name(s) of replica pod(s) + command: > + oc get pods -l deploymentconfig={{ _es_node }} -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + register: _pods + +- name: "Waiting for ES to be ready for {{ _es_node }}" + shell: > + oc exec "{{ _pod }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- {{ __es_local_curl }} https://localhost:9200/_cat/health | cut -d' ' -f4 + with_items: "{{ _pods.stdout.split(' ') }}" + loop_control: + loop_var: _pod + register: _pod_status + until: _pod_status.stdout in ['green', 'yellow'] + retries: 60 + delay: 5 + changed_when: false diff --git a/roles/openshift_logging_elasticsearch/templates/es.j2 b/roles/openshift_logging_elasticsearch/templates/es.j2 index bf04094a3..cf6ee36bb 100644 --- a/roles/openshift_logging_elasticsearch/templates/es.j2 +++ b/roles/openshift_logging_elasticsearch/templates/es.j2 @@ -17,6 +17,7 @@ spec: logging-infra: "{{logging_component}}" strategy: type: Recreate + triggers: [] template: metadata: name: "{{deploy_name}}" diff --git a/roles/openshift_logging_elasticsearch/vars/main.yml b/roles/openshift_logging_elasticsearch/vars/main.yml index 09e2ee4d0..c8e995146 100644 --- a/roles/openshift_logging_elasticsearch/vars/main.yml +++ b/roles/openshift_logging_elasticsearch/vars/main.yml @@ -5,6 +5,8 @@ __allowed_es_types: ["data-master", "data-client", "master", "client"] __es_log_appenders: ['file', 'console'] __kibana_index_modes: ["unique", "shared_ops"] +__es_local_curl: "curl -s --cacert /etc/elasticsearch/secret/admin-ca --cert /etc/elasticsearch/secret/admin-cert --key /etc/elasticsearch/secret/admin-key" + # TODO: integrate these openshift_master_config_dir: "{{ openshift.common.config_base }}/master" es_node_quorum: "{{ openshift_logging_elasticsearch_replica_count | int/2 + 1 }}" -- cgit v1.2.3