playbooks/adhoc/upgrades/upgrade.yml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407

---
- name: Load master facts
  hosts: masters
  roles:
  - openshift_facts

- name: Verify upgrade can proceed
  hosts: masters[0]
  vars:
    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
  gather_facts: no
  tasks:
    # Pacemaker is currently the only supported upgrade path for multiple masters
    - fail:
        msg: "openshift_master_cluster_method must be set to 'pacemaker'"
      when: openshift_master_ha | bool and ((openshift_master_cluster_method is not defined) or (openshift_master_cluster_method is defined and openshift_master_cluster_method != "pacemaker"))

- name: Run pre-upgrade checks on first master
  hosts: masters[0]
  tasks:
  # If this script errors out ansible will show the default stdout/stderr
  # which contains details for the user:
  - script: files/pre-upgrade-check

- name: Evaluate etcd_hosts
  hosts: localhost
  tasks:
  - name: Evaluate etcd hosts
    add_host:
      name: "{{ groups.masters.0 }}"
      groups: etcd_hosts
    when: hostvars[groups.masters.0].openshift.master.embedded_etcd | bool
  - name: Evaluate etcd hosts
    add_host:
      name: "{{ item }}"
      groups: etcd_hosts
    with_items: groups.etcd
    when: not hostvars[groups.masters.0].openshift.master.embedded_etcd | bool

- name: Backup etcd
  hosts: etcd_hosts
  vars:
    embedded_etcd: "{{ openshift.master.embedded_etcd }}"
    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
  roles:
  - openshift_facts
  tasks:

  - stat: path=/var/lib/openshift
    register: var_lib_openshift

  - stat: path=/var/lib/origin
    register: var_lib_origin

  - name: Create origin symlink if necessary
    file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False

  - name: Check available disk space for etcd backup
    # We assume to be using the data dir for all backups.
    shell: >
      df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1
    register: avail_disk

  - name: Check current embedded etcd disk usage
    shell: >
      du -k {{ openshift.master.etcd_data_dir }} | tail -n 1 | cut -f1
    register: etcd_disk_usage
    when: embedded_etcd | bool

  - name: Abort if insufficient disk space for etcd backup
    fail:
      msg: >
        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
        {{ avail_disk.stdout }} Kb available.
    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)

  - name: Install etcd (for etcdctl)
    yum:
      pkg: etcd
      state: latest

  - name: Generate etcd backup
    command: >
      etcdctl backup --data-dir={{ openshift.master.etcd_data_dir }}
      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}

  - name: Display location of etcd backup
    debug:
      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"

- name: Update deployment type
  hosts: OSEv3
  roles:
  - openshift_facts
  post_tasks:
  - openshift_facts:
      role: common
      local_facts:
        deployment_type: "{{ deployment_type }}"


- name: Perform upgrade version checking
  hosts: masters[0]
  tasks:
  - name: Clean yum cache
    command: yum clean all

  - name: Determine available versions
    script: files/versions.sh {{ openshift.common.service_type }} openshift
    register: g_versions_result

  - set_fact:
      g_aos_versions: "{{ g_versions_result.stdout | from_yaml }}"

  - set_fact:
      g_new_version: "{{ g_aos_versions.curr_version.split('-', 1).0 if g_aos_versions.avail_version is none else g_aos_versions.avail_version.split('-', 1).0 }}"

  - fail:
      msg: This playbook requires Origin 1.0.6 or later
    when: deployment_type == 'origin' and g_aos_versions.curr_version | version_compare('1.0.6','<')

  # TODO: This should be specific to the 3.1 upgrade playbook (coming in future refactor), otherwise we are blocking 3.0.1 to 3.0.2 here.
  - fail:
      msg: Atomic OpenShift 3.1 packages not found
    when: deployment_type in ['openshift-enterprise', 'atomic-openshift'] and g_aos_versions.curr_version | version_compare('3.0.2.900','<') and (g_aos_versions.avail_version is none or g_aos_versions.avail_version | version_compare('3.0.2.900','<'))
  # Deployment type 'enterprise' is no longer valid if we're upgrading to 3.1 or beyond.
  # (still valid for 3.0.x to 3.0.y however) Using the global deployment_type here as
  # we're checking what was requested by the upgrade, not the current type on the system.
  - fail:
      msg: "Deployment type enterprise not supported for upgrade"
    when: deployment_type == "enterprise" and  g_aos_versions.curr_version | version_compare('3.1', '>=')


- name: Upgrade masters
  hosts: masters
  vars:
    openshift_version: "{{ openshift_pkg_version | default('') }}"
  tasks:
    - name: Upgrade to latest available kernel
      yum:
        pkg: kernel
        state: latest

    - name: Upgrade master packages
      command: yum update -y {{ openshift.common.service_type }}-master{{ openshift_version }}

    - name: Ensure python-yaml present for config upgrade
      yum:
        pkg: PyYAML
        state: installed

    - name: Upgrade master configuration
      openshift_upgrade_config:
        from_version: '3.0'
        to_version: '3.1'
        role: master
        config_base: "{{ hostvars[inventory_hostname].openshift.common.config_base }}"
      when: deployment_type in ['openshift-enterprise', 'atomic-enterprise'] and g_aos_versions.curr_version | version_compare('3.1', '>=')

    - set_fact:
        master_certs_missing: True
        master_cert_subdir: master-{{ openshift.common.hostname }}
        master_cert_config_dir: "{{ openshift.common.config_base }}/master"

- name: Create temp directory for syncing certs
  hosts: localhost
  gather_facts: no
  tasks:
  - name: Create local temp directory for syncing certs
    local_action: command mktemp -d /tmp/openshift-ansible-XXXXXXX
    register: g_master_mktemp
    changed_when: False

- name: Generate missing master certificates
  hosts: masters[0]
  vars:
    master_hostnames: "{{ hostvars
                          | oo_select_keys(groups.masters)
                          | oo_collect('openshift.common.all_hostnames')
                          | oo_flatten | unique }}"
    master_generated_certs_dir: "{{ openshift.common.config_base }}/generated-configs"
    masters_needing_certs: "{{ hostvars
                               | oo_select_keys(groups.masters)
                               | difference([groups.masters.0]) }}"
    sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}"
    openshift_deployment_type: "{{ deployment_type }}"
  roles:
  - openshift_master_certificates
  post_tasks:
  - name: Remove generated etcd client certs when using external etcd
    file:
      path: "{{ master_generated_certs_dir }}/{{ item.0.master_cert_subdir }}/{{ item.1 }}"
      state: absent
    when: groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config
    with_nested:
    - masters_needing_certs
    - - master.etcd-client.crt
      - master.etcd-client.key

  - name: Create a tarball of the master certs
    command: >
      tar -czvf {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz
        -C {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }} .
    with_items: masters_needing_certs

  - name: Retrieve the master cert tarball from the master
    fetch:
      src: "{{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz"
      dest: "{{ sync_tmpdir }}/"
      flat: yes
      fail_on_missing: yes
      validate_checksum: yes
    with_items: masters_needing_certs

- name: Sync certs and restart masters post configuration change
  hosts: masters
  vars:
    sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}"
    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
  tasks:
  - name: Unarchive the tarball on the master
    unarchive:
      src: "{{ sync_tmpdir }}/{{ master_cert_subdir }}.tgz"
      dest: "{{ master_cert_config_dir }}"
    when: inventory_hostname != groups.masters.0

  - name: Restart master services
    service: name="{{ openshift.common.service_type}}-master" state=restarted
    when: not openshift_master_ha | bool

- name: Destroy cluster
  hosts: masters[0]
  vars:
    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
    openshift_deployment_type: "{{ deployment_type }}"
  pre_tasks:
  - name: Check for configured cluster
    stat:
      path: /etc/corosync/corosync.conf
    register: corosync_conf
    when: openshift_master_ha | bool
  - name: Destroy cluster
    command: pcs cluster destroy --all
    when: openshift_master_ha | bool and corosync_conf.stat.exists == true

- name: Start pcsd on masters
  hosts: masters
  vars:
    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
  tasks:
  - name: Start pcsd
    service: name=pcsd enabled=yes state=started
    when: openshift_master_ha | bool

- name: Re-create cluster
  hosts: masters[0]
  vars:
    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
    openshift_deployment_type: "{{ deployment_type }}"
    omc_cluster_hosts: "{{ groups.masters | join(' ') }}"
  roles:
  - role: openshift_master_cluster
    when: openshift_master_ha | bool

- name: Delete temporary directory on localhost
  hosts: localhost
  gather_facts: no
  tasks:
  - file: name={{ g_master_mktemp.stdout }} state=absent
    changed_when: False


- name: Upgrade nodes
  hosts: nodes
  vars:
    openshift_version: "{{ openshift_pkg_version | default('') }}"
  roles:
  - openshift_facts
  tasks:
    - name: Upgrade node packages
      command: yum update -y {{ openshift.common.service_type }}-node{{ openshift_version }}
    - name: Restart node services
      service: name="{{ openshift.common.service_type }}-node" state=restarted

- name: Update cluster policy and policy bindings
  hosts: masters[0]
  vars:
    origin_reconcile_bindings: "{{ deployment_type == 'origin' and g_new_version | version_compare('1.0.6', '>') }}"
    ent_reconcile_bindings: "{{ deployment_type in ['openshift-enterprise', 'atomic-enterprise'] and g_new_version | version_compare('3.0.2','>') }}"
  tasks:
    - name: oadm policy reconcile-cluster-roles --confirm
      command: >
        {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig
        policy reconcile-cluster-roles --confirm

    - name: oadm policy reconcile-cluster-role-bindings --confirm
      command: >
        {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig
        policy reconcile-cluster-role-bindings
        --exclude-groups=system:authenticated
        --exclude-groups=system:unauthenticated
        --exclude-users=system:anonymous
        --additive-only=true --confirm
      when: origin_reconcile_bindings | bool or ent_reconcile_bindings | bool


- name: Restart masters post reconcile
  hosts: masters
  vars:
    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
  tasks:
    - name: Restart master services
      service: name="{{ openshift.common.service_type}}-master" state=restarted
      when: not openshift_master_ha | bool

- name: Restart cluster post reconcile
  hosts: masters[0]
  vars:
    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
  tasks:
    - name: Restart master cluster
      command: pcs resource restart master
      when: openshift_master_ha | bool
    - name: Wait for the clustered master service to be available
      wait_for:
        host: "{{ openshift_master_cluster_vip }}"
        port: 8443
        state: started
        timeout: 180
        delay: 90
      when: openshift_master_ha | bool

- name: Upgrade default router and registry
  hosts: masters[0]
  vars:
    - registry_image: "{{  openshift.master.registry_url | replace( '${component}', 'docker-registry' )  | replace ( '${version}', 'v' + g_new_version  ) }}"
    - router_image: "{{ openshift.master.registry_url | replace( '${component}', 'haproxy-router' ) | replace ( '${version}', 'v' + g_new_version ) }}"
    - oc_cmd: "{{ openshift.common.client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig"
  tasks:
    - name: Check for default router
      command: >
        {{ oc_cmd }} get -n default dc/router
      register: _default_router
      failed_when: false
      changed_when: false
    - name: Check for allowHostNetwork and allowHostPorts
      when: _default_router.rc == 0
      shell: >
        {{ oc_cmd }} get -o yaml scc/privileged | /usr/bin/grep -e allowHostPorts -e allowHostNetwork
      register: _scc
    - name: Grant allowHostNetwork and allowHostPorts
      when:
        - _default_router.rc == 0
        - "'false' in _scc.stdout"
      command: >
        {{ oc_cmd }} patch scc/privileged -p '{"allowHostPorts":true,"allowHostNetwork":true}' --loglevel=9
    - name: Update deployment config to 1.0.4/3.0.1 spec
      when: _default_router.rc == 0
      command: >
        {{ oc_cmd }} patch dc/router -p
        '{"spec":{"strategy":{"rollingParams":{"updatePercent":-10},"spec":{"serviceAccount":"router","serviceAccountName":"router"}}}}'
    - name: Switch to hostNetwork=true
      when: _default_router.rc == 0
      command: >
        {{ oc_cmd }} patch dc/router -p '{"spec":{"template":{"spec":{"hostNetwork":true}}}}'
    - name: Update router image to current version
      when: _default_router.rc == 0
      command: >
        {{ oc_cmd }} patch dc/router -p
        '{"spec":{"template":{"spec":{"containers":[{"name":"router","image":"{{ router_image }}"}]}}}}'

    - name: Check for default registry
      command: >
          {{ oc_cmd }} get -n default dc/docker-registry
      register: _default_registry
      failed_when: false
      changed_when: false
    - name: Update registry image to current version
      when: _default_registry.rc == 0
      command: >
        {{ oc_cmd }} patch dc/docker-registry -p
        '{"spec":{"template":{"spec":{"containers":[{"name":"registry","image":"{{ registry_image }}"}]}}}}'

- name: Update image streams and templates
  hosts: masters[0]
  vars:
    openshift_examples_import_command: "update"
    openshift_deployment_type: "{{ deployment_type }}"
  roles:
    - openshift_examples

- name: Ensure master services enabled
  hosts: masters
  vars:
    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
  tasks:
  - name: Enable master services
    service: name="{{ openshift.common.service_type}}-master" state=started enabled=yes
    when: not openshift_master_ha | bool

- name: Ensure node services enabled
  hosts: nodes
  tasks:
    - name: Restart node services
      service: name="{{ openshift.common.service_type }}-node" state=started enabled=yes