diff options
| author | Jason DeTiberus <jdetiber@redhat.com> | 2015-11-06 11:36:42 -0500 | 
|---|---|---|
| committer | Jason DeTiberus <jdetiber@redhat.com> | 2015-11-06 11:36:42 -0500 | 
| commit | 8b3006399a4fbcacd420fb407c02a7504be3d36b (patch) | |
| tree | 6ae6a9dd2e3f7ec3138f3676f8a1368cf638ad0c | |
| parent | 941246584a95cc4b5d5619e2b31f75923888f2f6 (diff) | |
| parent | e5cafe3da31848e12568a6fbd79dfd2193bb72e7 (diff) | |
Merge remote-tracking branch 'origin/upgradeFix2' into upgradeFix2
| -rw-r--r-- | playbooks/adhoc/upgrades/files/pre-upgrade-check | 185 | ||||
| -rw-r--r-- | playbooks/adhoc/upgrades/upgrade.yml | 61 | ||||
| -rw-r--r-- | roles/openshift_master/tasks/main.yml | 5 | ||||
| -rw-r--r-- | utils/src/ooinstall/cli_installer.py | 4 | 
4 files changed, 251 insertions, 4 deletions
| diff --git a/playbooks/adhoc/upgrades/files/pre-upgrade-check b/playbooks/adhoc/upgrades/files/pre-upgrade-check new file mode 100644 index 000000000..c8ecae399 --- /dev/null +++ b/playbooks/adhoc/upgrades/files/pre-upgrade-check @@ -0,0 +1,185 @@ +#!/usr/bin/env python +""" +Pre-upgrade checks that must be run on a master before proceeding with upgrade. +""" +# This is a script not a python module: +# pylint: disable=invalid-name + +# NOTE: This script should not require any python libs other than what is +# in the standard library. + +__license__ = "ASL 2.0" + +import json +import os +import subprocess +import re + +# The maximum length of container.ports.name +ALLOWED_LENGTH = 15 +# The valid structure of container.ports.name +ALLOWED_CHARS = re.compile('^[a-z0-9][a-z0-9\\-]*[a-z0-9]$') +AT_LEAST_ONE_LETTER = re.compile('[a-z]') +# look at OS_PATH for the full path. Default ot 'oc' +OC_PATH = os.getenv('OC_PATH', 'oc') + + +def validate(value): +    """ +    validate verifies that value matches required conventions + +    Rules of container.ports.name validation: + +    * must be less that 16 chars +    * at least one letter +    * only a-z0-9- +    * hyphens can not be leading or trailing or next to each other + +    :Parameters: +       - `value`: Value to validate +    """ +    if len(value) > ALLOWED_LENGTH: +        return False + +    if '--' in value: +        return False + +    # We search since it can be anywhere +    if not AT_LEAST_ONE_LETTER.search(value): +        return False + +    # We match because it must start at the beginning +    if not ALLOWED_CHARS.match(value): +        return False +    return True + + +def list_items(kind): +    """ +    list_items returns a list of items from the api + +    :Parameters: +       - `kind`: Kind of item to access +    """ +    response = subprocess.check_output([OC_PATH, 'get', '--all-namespaces', '-o', 'json', kind]) +    items = json.loads(response) +    return items.get("items", []) + + +def get(obj, *paths): +    """ +    Gets an object + +    :Parameters: +       - `obj`: A dictionary structure +       - `path`: All other non-keyword arguments +    """ +    ret_obj = obj +    for path in paths: +        if ret_obj.get(path, None) is None: +            return [] +        ret_obj = ret_obj[path] +    return ret_obj + + +# pylint: disable=too-many-arguments +def pretty_print_errors(namespace, kind, item_name, container_name, port_name, valid): +    """ +    Prints out results in human friendly way. + +    :Parameters: +       - `namespace`: Namespace of the resource +       - `kind`: Kind of the resource +       - `item_name`: Name of the resource +       - `container_name`: Name of the container. May be "" when kind=Service. +       - `port_name`: Name of the port +       - `valid`: True if the port is valid +    """ +    if not valid: +        if len(container_name) > 0: +            print('%s/%s -n %s (Container="%s" Port="%s")' % ( +                kind, item_name, namespace, container_name, port_name)) +        else: +            print('%s/%s -n %s (Port="%s")' % ( +                kind, item_name, namespace, port_name)) + + +def print_validation_header(): +    """ +    Prints the error header. Should run on the first error to avoid +    overwhelming the user. +    """ +    print """\ +At least one port name does not validate. Valid port names: + +    * must be less that 16 chars +    * have at least one letter +    * only a-z0-9- +    * do not start or end with - +    * Dashes may not be next to eachother ('--') +""" + + +def main(): +    """ +    main is the main entry point to this script +    """ +    try: +        # the comma at the end suppresses the newline +        print "Checking for oc ...", +        subprocess.check_output([OC_PATH, 'whoami']) +        print "found" +    except: +        print( +            'Can not find oc (%s). Override the path with the ' +            'OC_PATH environment variable. Exiting...' % OC_PATH) +        raise SystemExit(1) + +    # Where the magic happens +    first_error = True +    for kind, path in [ +            ('replicationcontrollers', ("spec", "template", "spec", "containers")), +            ('pods', ("spec", "containers")), +            ('deploymentconfigs', ("spec", "template", "spec", "containers"))]: +        for item in list_items(kind): +            namespace = item["metadata"]["namespace"] +            item_name = item["metadata"]["name"] +            for container in get(item, *path): +                container_name = container["name"] +                for port in get(container, "ports"): +                    port_name = port.get("name", None) +                    if not port_name: +                        # Unnamed ports are OK +                        continue +                    valid = validate(port_name) +                    if not valid and first_error: +                        first_error = False +                        print_validation_header() +                    pretty_print_errors( +                        namespace, kind, item_name, +                        container_name, port_name, valid) + +    # Services follow a different flow +    for item in list_items('services'): +        namespace = item["metadata"]["namespace"] +        item_name = item["metadata"]["name"] +        for port in get(item, "spec", "ports"): +            port_name = port.get("targetPort", None) +            if isinstance(port_name, int) or port_name is None: +                # Integer only or unnamed ports are OK +                continue +            valid = validate(port_name) +            if not valid and first_error: +                first_error = False +                print_validation_header() +            pretty_print_errors( +                namespace, "services", item_name, "", port_name, valid) + +    # If we had at least 1 error then exit with 1 +    if not first_error: +        raise SystemExit(1) + + +if __name__ == '__main__': +    main() + diff --git a/playbooks/adhoc/upgrades/upgrade.yml b/playbooks/adhoc/upgrades/upgrade.yml index e63add4d1..dab0195ac 100644 --- a/playbooks/adhoc/upgrades/upgrade.yml +++ b/playbooks/adhoc/upgrades/upgrade.yml @@ -6,12 +6,24 @@  - name: Verify upgrade can proceed    hosts: masters[0] +  vars: +    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"    gather_facts: no    tasks:      # Checking the global deployment type rather than host facts, this is about      # what the user is requesting.      - fail: msg="Deployment type enterprise not supported for upgrade"        when: deployment_type == "enterprise" +    # Pacemaker is currently the only supported upgrade path for multiple masters +    - fail: msg="openshift_master_cluster_method must be set to 'pacemaker'" +      when: openshift_master_ha | bool and ((openshift_master_cluster_method is not defined) or (openshift_master_cluster_method is defined and openshift_master_cluster_method != "pacemaker")) + +- name: Run pre-upgrade checks on first master +  hosts: masters[0] +  tasks: +  # If this script errors out ansible will show the default stdout/stderr +  # which contains details for the user: +  - script: files/pre-upgrade-check  - name: Evaluate etcd_hosts    hosts: localhost @@ -182,8 +194,6 @@      command: >        tar -czvf {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz          -C {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }} . -    args: -      creates: "{{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz"      with_items: masters_needing_certs    - name: Retrieve the master cert tarball from the master @@ -195,11 +205,11 @@        validate_checksum: yes      with_items: masters_needing_certs -  - name: Sync certs and restart masters post configuration change    hosts: masters    vars:      sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}" +    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"    tasks:    - name: Unarchive the tarball on the master      unarchive: @@ -209,7 +219,41 @@    - name: Restart master services      service: name="{{ openshift.common.service_type}}-master" state=restarted +    when: not openshift_master_ha | bool + +- name: Destroy cluster +  hosts: masters[0] +  vars: +    openshift_master_ha: "{{ groups['masters'] | length > 1 }}" +    openshift_deployment_type: "{{ deployment_type }}" +  pre_tasks: +  - name: Check for configured cluster +    stat: +      path: /etc/corosync/corosync.conf +    register: corosync_conf +    when: openshift_master_ha | bool +  - name: Destroy cluster +    command: pcs cluster destroy --all +    when: openshift_master_ha | bool and corosync_conf.stat.exists == true + +- name: Start pcsd on masters +  hosts: masters +  vars: +    openshift_master_ha: "{{ groups['masters'] | length > 1 }}" +  tasks: +  - name: Start pcsd +    service: name=pcsd state=started +    when: openshift_master_ha | bool +- name: Re-create cluster +  hosts: masters[0] +  vars: +    openshift_master_ha: "{{ groups['masters'] | length > 1 }}" +    openshift_deployment_type: "{{ deployment_type }}" +    omc_cluster_hosts: "{{ groups.masters | join(' ') }}" +  roles: +  - role: openshift_master_cluster +    when: openshift_master_ha | bool  - name: Delete temporary directory on localhost    hosts: localhost @@ -255,10 +299,21 @@  - name: Restart masters post reconcile    hosts: masters +  vars: +    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"    tasks:      - name: Restart master services        service: name="{{ openshift.common.service_type}}-master" state=restarted +      when: not openshift_master_ha | bool +- name: Restart cluster post reconcile +  hosts: masters[0] +  vars: +    openshift_master_ha: "{{ groups['masters'] | length > 1 }}" +  tasks: +    - name: Restart master cluster +      command: pcs resource restart master +      when: openshift_master_ha | bool  - name: Upgrade default router and registry    hosts: masters[0] diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index be77fce4a..35570923c 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -140,22 +140,27 @@      src: atomic-openshift-master-api.service.j2      dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-api.service      force: no +  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"  - name: Create the controllers service file    template:      src: atomic-openshift-master-controllers.service.j2      dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-controllers.service      force: no +  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"  - name: Create the api env file    template:      src: atomic-openshift-master-api.j2      dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-api      force: no +  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"  - name: Create the controllers env file    template:      src: atomic-openshift-master-controllers.j2      dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers      force: no +  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"  - command: systemctl daemon-reload +  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"  # end workaround for missing systemd unit files  - name: Create session secrets file diff --git a/utils/src/ooinstall/cli_installer.py b/utils/src/ooinstall/cli_installer.py index 9f0861b77..e63f14816 100644 --- a/utils/src/ooinstall/cli_installer.py +++ b/utils/src/ooinstall/cli_installer.py @@ -323,6 +323,8 @@ def get_installed_hosts(hosts, callback_facts):              installed_hosts.append(host)      return installed_hosts +# pylint: disable=too-many-branches +# This pylint error will be corrected shortly in separate PR.  def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force, verbose):      # Copy the list of existing hosts so we can remove any already installed nodes. @@ -383,7 +385,7 @@ def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force, verbose):                      openshift_ansible.set_config(oo_cfg)                      click.echo('Gathering information from hosts...') -                    callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts) +                    callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts, verbose)                      if error:                          click.echo("There was a problem fetching the required information. " \                                     "See {} for details.".format(oo_cfg.settings['ansible_log_path'])) | 
