blob: 5024afc04782625c4afde82700085f101fd7a5dc (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
---
# tasks file for ansible-role-cuda
- name: "Gather OS specific variables"
include_vars: "{{ item }}"
with_first_found:
- "{{ ansible_distribution|lower }}-{{ ansible_distribution_version }}.yml"
- "{{ ansible_distribution|lower }}-{{ ansible_distribution_major_version }}.yml"
- "{{ ansible_distribution|lower }}.yml"
- "{{ ansible_os_family|lower }}.yml"
- block:
- include_tasks: configure_yum.yml
when: ansible_pkg_mgr == 'yum' or ansible_pkg_mgr == 'dnf'
- include_tasks: configure_apt.yml
when: ansible_pkg_mgr == 'apt'
- name: Install kernel development files
package: name=kernel-devel state=present
register: result
- name: Synchronize kernel and kernel-devel packages
package: name=kernel state=latest
when: result is changed
- name: Install CUDA and related packages (1.5-2GB download, also restarts if cuda_restart_node_on_install is set to True)
package:
name: "{{ item }}"
state: present
with_items: "{{ cuda_packages }}"
register: cuda_packages_installation
notify:
- ZZ CUDA Restart server
- ZZ CUDA Wait for server to restart
- name: Template CUDA paths to user environments
template:
src: cuda.sh.j2
dest: /etc/profile.d/cuda.sh
mode: 0755
when: cuda_bash_profile
- include_tasks: cuda_init.yml
when: cuda_init == True
# This is here because if we in the same playbook try to start slurmd without
# having run the cuda_init.sh script then slurmd doesn't start and the play fails.
# todo: reload nvidia modules/etc instead of restart
- name: flush the handlers - so that the node is rebooted after CUDA is installed and that the GPUs are initialized before we start slurm
meta: flush_handlers
when: gpu == True
# vim:ft=ansible:
|