summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason DeTiberus <jdetiber@redhat.com>2015-11-06 11:36:42 -0500
committerJason DeTiberus <jdetiber@redhat.com>2015-11-06 11:36:42 -0500
commit8b3006399a4fbcacd420fb407c02a7504be3d36b (patch)
tree6ae6a9dd2e3f7ec3138f3676f8a1368cf638ad0c
parent941246584a95cc4b5d5619e2b31f75923888f2f6 (diff)
parente5cafe3da31848e12568a6fbd79dfd2193bb72e7 (diff)
downloadopenshift-8b3006399a4fbcacd420fb407c02a7504be3d36b.tar.gz
openshift-8b3006399a4fbcacd420fb407c02a7504be3d36b.tar.bz2
openshift-8b3006399a4fbcacd420fb407c02a7504be3d36b.tar.xz
openshift-8b3006399a4fbcacd420fb407c02a7504be3d36b.zip
Merge remote-tracking branch 'origin/upgradeFix2' into upgradeFix2
-rw-r--r--playbooks/adhoc/upgrades/files/pre-upgrade-check185
-rw-r--r--playbooks/adhoc/upgrades/upgrade.yml61
-rw-r--r--roles/openshift_master/tasks/main.yml5
-rw-r--r--utils/src/ooinstall/cli_installer.py4
4 files changed, 251 insertions, 4 deletions
diff --git a/playbooks/adhoc/upgrades/files/pre-upgrade-check b/playbooks/adhoc/upgrades/files/pre-upgrade-check
new file mode 100644
index 000000000..c8ecae399
--- /dev/null
+++ b/playbooks/adhoc/upgrades/files/pre-upgrade-check
@@ -0,0 +1,185 @@
+#!/usr/bin/env python
+"""
+Pre-upgrade checks that must be run on a master before proceeding with upgrade.
+"""
+# This is a script not a python module:
+# pylint: disable=invalid-name
+
+# NOTE: This script should not require any python libs other than what is
+# in the standard library.
+
+__license__ = "ASL 2.0"
+
+import json
+import os
+import subprocess
+import re
+
+# The maximum length of container.ports.name
+ALLOWED_LENGTH = 15
+# The valid structure of container.ports.name
+ALLOWED_CHARS = re.compile('^[a-z0-9][a-z0-9\\-]*[a-z0-9]$')
+AT_LEAST_ONE_LETTER = re.compile('[a-z]')
+# look at OS_PATH for the full path. Default ot 'oc'
+OC_PATH = os.getenv('OC_PATH', 'oc')
+
+
+def validate(value):
+ """
+ validate verifies that value matches required conventions
+
+ Rules of container.ports.name validation:
+
+ * must be less that 16 chars
+ * at least one letter
+ * only a-z0-9-
+ * hyphens can not be leading or trailing or next to each other
+
+ :Parameters:
+ - `value`: Value to validate
+ """
+ if len(value) > ALLOWED_LENGTH:
+ return False
+
+ if '--' in value:
+ return False
+
+ # We search since it can be anywhere
+ if not AT_LEAST_ONE_LETTER.search(value):
+ return False
+
+ # We match because it must start at the beginning
+ if not ALLOWED_CHARS.match(value):
+ return False
+ return True
+
+
+def list_items(kind):
+ """
+ list_items returns a list of items from the api
+
+ :Parameters:
+ - `kind`: Kind of item to access
+ """
+ response = subprocess.check_output([OC_PATH, 'get', '--all-namespaces', '-o', 'json', kind])
+ items = json.loads(response)
+ return items.get("items", [])
+
+
+def get(obj, *paths):
+ """
+ Gets an object
+
+ :Parameters:
+ - `obj`: A dictionary structure
+ - `path`: All other non-keyword arguments
+ """
+ ret_obj = obj
+ for path in paths:
+ if ret_obj.get(path, None) is None:
+ return []
+ ret_obj = ret_obj[path]
+ return ret_obj
+
+
+# pylint: disable=too-many-arguments
+def pretty_print_errors(namespace, kind, item_name, container_name, port_name, valid):
+ """
+ Prints out results in human friendly way.
+
+ :Parameters:
+ - `namespace`: Namespace of the resource
+ - `kind`: Kind of the resource
+ - `item_name`: Name of the resource
+ - `container_name`: Name of the container. May be "" when kind=Service.
+ - `port_name`: Name of the port
+ - `valid`: True if the port is valid
+ """
+ if not valid:
+ if len(container_name) > 0:
+ print('%s/%s -n %s (Container="%s" Port="%s")' % (
+ kind, item_name, namespace, container_name, port_name))
+ else:
+ print('%s/%s -n %s (Port="%s")' % (
+ kind, item_name, namespace, port_name))
+
+
+def print_validation_header():
+ """
+ Prints the error header. Should run on the first error to avoid
+ overwhelming the user.
+ """
+ print """\
+At least one port name does not validate. Valid port names:
+
+ * must be less that 16 chars
+ * have at least one letter
+ * only a-z0-9-
+ * do not start or end with -
+ * Dashes may not be next to eachother ('--')
+"""
+
+
+def main():
+ """
+ main is the main entry point to this script
+ """
+ try:
+ # the comma at the end suppresses the newline
+ print "Checking for oc ...",
+ subprocess.check_output([OC_PATH, 'whoami'])
+ print "found"
+ except:
+ print(
+ 'Can not find oc (%s). Override the path with the '
+ 'OC_PATH environment variable. Exiting...' % OC_PATH)
+ raise SystemExit(1)
+
+ # Where the magic happens
+ first_error = True
+ for kind, path in [
+ ('replicationcontrollers', ("spec", "template", "spec", "containers")),
+ ('pods', ("spec", "containers")),
+ ('deploymentconfigs', ("spec", "template", "spec", "containers"))]:
+ for item in list_items(kind):
+ namespace = item["metadata"]["namespace"]
+ item_name = item["metadata"]["name"]
+ for container in get(item, *path):
+ container_name = container["name"]
+ for port in get(container, "ports"):
+ port_name = port.get("name", None)
+ if not port_name:
+ # Unnamed ports are OK
+ continue
+ valid = validate(port_name)
+ if not valid and first_error:
+ first_error = False
+ print_validation_header()
+ pretty_print_errors(
+ namespace, kind, item_name,
+ container_name, port_name, valid)
+
+ # Services follow a different flow
+ for item in list_items('services'):
+ namespace = item["metadata"]["namespace"]
+ item_name = item["metadata"]["name"]
+ for port in get(item, "spec", "ports"):
+ port_name = port.get("targetPort", None)
+ if isinstance(port_name, int) or port_name is None:
+ # Integer only or unnamed ports are OK
+ continue
+ valid = validate(port_name)
+ if not valid and first_error:
+ first_error = False
+ print_validation_header()
+ pretty_print_errors(
+ namespace, "services", item_name, "", port_name, valid)
+
+ # If we had at least 1 error then exit with 1
+ if not first_error:
+ raise SystemExit(1)
+
+
+if __name__ == '__main__':
+ main()
+
diff --git a/playbooks/adhoc/upgrades/upgrade.yml b/playbooks/adhoc/upgrades/upgrade.yml
index e63add4d1..dab0195ac 100644
--- a/playbooks/adhoc/upgrades/upgrade.yml
+++ b/playbooks/adhoc/upgrades/upgrade.yml
@@ -6,12 +6,24 @@
- name: Verify upgrade can proceed
hosts: masters[0]
+ vars:
+ openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
gather_facts: no
tasks:
# Checking the global deployment type rather than host facts, this is about
# what the user is requesting.
- fail: msg="Deployment type enterprise not supported for upgrade"
when: deployment_type == "enterprise"
+ # Pacemaker is currently the only supported upgrade path for multiple masters
+ - fail: msg="openshift_master_cluster_method must be set to 'pacemaker'"
+ when: openshift_master_ha | bool and ((openshift_master_cluster_method is not defined) or (openshift_master_cluster_method is defined and openshift_master_cluster_method != "pacemaker"))
+
+- name: Run pre-upgrade checks on first master
+ hosts: masters[0]
+ tasks:
+ # If this script errors out ansible will show the default stdout/stderr
+ # which contains details for the user:
+ - script: files/pre-upgrade-check
- name: Evaluate etcd_hosts
hosts: localhost
@@ -182,8 +194,6 @@
command: >
tar -czvf {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz
-C {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }} .
- args:
- creates: "{{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz"
with_items: masters_needing_certs
- name: Retrieve the master cert tarball from the master
@@ -195,11 +205,11 @@
validate_checksum: yes
with_items: masters_needing_certs
-
- name: Sync certs and restart masters post configuration change
hosts: masters
vars:
sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}"
+ openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
tasks:
- name: Unarchive the tarball on the master
unarchive:
@@ -209,7 +219,41 @@
- name: Restart master services
service: name="{{ openshift.common.service_type}}-master" state=restarted
+ when: not openshift_master_ha | bool
+
+- name: Destroy cluster
+ hosts: masters[0]
+ vars:
+ openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+ openshift_deployment_type: "{{ deployment_type }}"
+ pre_tasks:
+ - name: Check for configured cluster
+ stat:
+ path: /etc/corosync/corosync.conf
+ register: corosync_conf
+ when: openshift_master_ha | bool
+ - name: Destroy cluster
+ command: pcs cluster destroy --all
+ when: openshift_master_ha | bool and corosync_conf.stat.exists == true
+
+- name: Start pcsd on masters
+ hosts: masters
+ vars:
+ openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+ tasks:
+ - name: Start pcsd
+ service: name=pcsd state=started
+ when: openshift_master_ha | bool
+- name: Re-create cluster
+ hosts: masters[0]
+ vars:
+ openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+ openshift_deployment_type: "{{ deployment_type }}"
+ omc_cluster_hosts: "{{ groups.masters | join(' ') }}"
+ roles:
+ - role: openshift_master_cluster
+ when: openshift_master_ha | bool
- name: Delete temporary directory on localhost
hosts: localhost
@@ -255,10 +299,21 @@
- name: Restart masters post reconcile
hosts: masters
+ vars:
+ openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
tasks:
- name: Restart master services
service: name="{{ openshift.common.service_type}}-master" state=restarted
+ when: not openshift_master_ha | bool
+- name: Restart cluster post reconcile
+ hosts: masters[0]
+ vars:
+ openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+ tasks:
+ - name: Restart master cluster
+ command: pcs resource restart master
+ when: openshift_master_ha | bool
- name: Upgrade default router and registry
hosts: masters[0]
diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml
index be77fce4a..35570923c 100644
--- a/roles/openshift_master/tasks/main.yml
+++ b/roles/openshift_master/tasks/main.yml
@@ -140,22 +140,27 @@
src: atomic-openshift-master-api.service.j2
dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-api.service
force: no
+ when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
- name: Create the controllers service file
template:
src: atomic-openshift-master-controllers.service.j2
dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-controllers.service
force: no
+ when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
- name: Create the api env file
template:
src: atomic-openshift-master-api.j2
dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-api
force: no
+ when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
- name: Create the controllers env file
template:
src: atomic-openshift-master-controllers.j2
dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers
force: no
+ when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
- command: systemctl daemon-reload
+ when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
# end workaround for missing systemd unit files
- name: Create session secrets file
diff --git a/utils/src/ooinstall/cli_installer.py b/utils/src/ooinstall/cli_installer.py
index 9f0861b77..e63f14816 100644
--- a/utils/src/ooinstall/cli_installer.py
+++ b/utils/src/ooinstall/cli_installer.py
@@ -323,6 +323,8 @@ def get_installed_hosts(hosts, callback_facts):
installed_hosts.append(host)
return installed_hosts
+# pylint: disable=too-many-branches
+# This pylint error will be corrected shortly in separate PR.
def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force, verbose):
# Copy the list of existing hosts so we can remove any already installed nodes.
@@ -383,7 +385,7 @@ def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force, verbose):
openshift_ansible.set_config(oo_cfg)
click.echo('Gathering information from hosts...')
- callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts)
+ callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts, verbose)
if error:
click.echo("There was a problem fetching the required information. " \
"See {} for details.".format(oo_cfg.settings['ansible_log_path']))