diff options
122 files changed, 3270 insertions, 369 deletions
diff --git a/.redhat-ci.inventory b/.papr.inventory index 23bc9923c..23bc9923c 100644 --- a/.redhat-ci.inventory +++ b/.papr.inventory diff --git a/.redhat-ci.sh b/.papr.sh index fce8c1d52..decca625f 100755 --- a/.redhat-ci.sh +++ b/.papr.sh @@ -1,10 +1,12 @@  #!/bin/bash  set -xeuo pipefail +echo "Targeting OpenShift Origin $OPENSHIFT_IMAGE_TAG" +  pip install -r requirements.txt  # ping the nodes to check they're responding and register their ostree versions -ansible -vvv -i .redhat-ci.inventory nodes -a 'rpm-ostree status' +ansible -vvv -i .papr.inventory nodes -a 'rpm-ostree status'  upload_journals() {    mkdir journals @@ -16,7 +18,9 @@ upload_journals() {  trap upload_journals ERR  # run the actual installer -ansible-playbook -vvv -i .redhat-ci.inventory playbooks/byo/config.yml +# FIXME: override openshift_image_tag defined in the inventory until +# https://github.com/openshift/openshift-ansible/issues/4478 is fixed. +ansible-playbook -vvv -i .papr.inventory playbooks/byo/config.yml -e "openshift_image_tag=$OPENSHIFT_IMAGE_TAG"  # run a small subset of origin conformance tests to sanity  # check the cluster NB: we run it on the master since we may diff --git a/.papr.yml b/.papr.yml new file mode 100644 index 000000000..16d6e78b1 --- /dev/null +++ b/.papr.yml @@ -0,0 +1,42 @@ +--- + +# This YAML file is used by PAPR. It details the test +# environment to provision and the test procedure. For more +# information on PAPR, see: +# +#   https://github.com/projectatomic/papr +# +# The PAPR YAML specification detailing allowed fields can +# be found at: +# +#   https://github.com/projectatomic/papr/blob/master/sample.papr.yml + +cluster: +  hosts: +    - name: ocp-master +      distro: fedora/25/atomic +    - name: ocp-node1 +      distro: fedora/25/atomic +    - name: ocp-node2 +      distro: fedora/25/atomic +  container: +    image: fedora:25 + +packages: +  - gcc +  - python-pip +  - python-devel +  - libffi-devel +  - openssl-devel +  - redhat-rpm-config + +context: 'fedora/25/atomic' + +env: +  OPENSHIFT_IMAGE_TAG: v3.6.0-alpha.1 + +tests: +  - ./.papr.sh + +artifacts: +  - journals/ diff --git a/.redhat-ci.yml b/.redhat-ci.yml deleted file mode 100644 index 6dac7b256..000000000 --- a/.redhat-ci.yml +++ /dev/null @@ -1,30 +0,0 @@ ---- - -cluster: -  hosts: -    - name: ocp-master -      distro: fedora/25/atomic -    - name: ocp-node1 -      distro: fedora/25/atomic -    - name: ocp-node2 -      distro: fedora/25/atomic -  container: -    image: fedora:25 - -packages: -  - gcc -  - python-pip -  - python-devel -  - openssl-devel -  - redhat-rpm-config - -context: 'fedora/25/atomic | origin/v3.6.0-alpha.1' - -env: -  OPENSHIFT_IMAGE_TAG: v3.6.0-alpha.1 - -tests: -  - ./.redhat-ci.sh - -artifacts: -  - journals/ diff --git a/.tito/packages/openshift-ansible b/.tito/packages/openshift-ansible index 2f89bc67f..98e277c19 100644 --- a/.tito/packages/openshift-ansible +++ b/.tito/packages/openshift-ansible @@ -1 +1 @@ -3.6.109-1 ./ +3.6.117-1 ./ diff --git a/docs/pull_requests.md b/docs/pull_requests.md index fcc3e275c..45ae01a9d 100644 --- a/docs/pull_requests.md +++ b/docs/pull_requests.md @@ -10,8 +10,8 @@ Whenever a  [Pull Request is opened](../CONTRIBUTING.md#submitting-contributions), some  automated test jobs must be successfully run before the PR can be merged. -Some of these jobs are automatically triggered, e.g., Travis and Coveralls. -Other jobs need to be manually triggered by a member of the +Some of these jobs are automatically triggered, e.g., Travis, PAPR, and +Coveralls. Other jobs need to be manually triggered by a member of the  [Team OpenShift Ansible Contributors](https://github.com/orgs/openshift/teams/team-openshift-ansible-contributors).  ## Triggering tests @@ -48,9 +48,9 @@ simplifying the workflow towards a single infrastructure in the future.  There are a set of tests that run on Fedora infrastructure. They are started  automatically with every pull request. -They are implemented using the [`redhat-ci` framework](https://github.com/jlebon/redhat-ci). +They are implemented using the [`PAPR` framework](https://github.com/projectatomic/papr). -To re-run tests, write a comment containing `bot, retest this please`. +To re-run tests, write a comment containing only `bot, retest this please`.  ## Triggering merge diff --git a/docs/repo_structure.md b/docs/repo_structure.md index 693837fba..f598f22c3 100644 --- a/docs/repo_structure.md +++ b/docs/repo_structure.md @@ -52,3 +52,16 @@ These are plugins used in playbooks and roles:  .  └── test                Contains tests.  ``` + +### CI + +These files are used by [PAPR](https://github.com/projectatomic/papr), +It is very similar in workflow to Travis, with the test +environment and test scripts defined in a YAML file. + +``` +. +├── .papr.yml +├── .papr.sh +└── .papr.inventory +``` diff --git a/images/installer/system-container/README.md b/images/installer/system-container/README.md index dc95307e5..fbcd47c4a 100644 --- a/images/installer/system-container/README.md +++ b/images/installer/system-container/README.md @@ -11,3 +11,21 @@ These files are needed to run the installer using an [Atomic System container](h  * service.template - Template file for the systemd service.  * tmpfiles.template - Template file for systemd-tmpfiles. + +## Options + +These options may be set via the ``atomic`` ``--set`` flag. For defaults see ``root/exports/manifest.json`` + +* OPTS - Additional options to pass to ansible when running the installer + +* VAR_LIB_OPENSHIFT_INSTALLER - Full path of the installer code to mount into the container + +* VAR_LOG_OPENSHIFT_LOG - Full path of the log file to mount into the container + +* PLAYBOOK_FILE - Full path of the playbook inside the container + +* HOME_ROOT - Full path on host to mount as the root home directory inside the container (for .ssh/, etc..) + +* ANSIBLE_CONFIG - Full path for the ansible configuration file to use inside the container + +* INVENTORY_FILE - Full path for the inventory to use from the host diff --git a/images/installer/system-container/root/exports/config.json.template b/images/installer/system-container/root/exports/config.json.template index 397ac941a..739c0080f 100644 --- a/images/installer/system-container/root/exports/config.json.template +++ b/images/installer/system-container/root/exports/config.json.template @@ -21,7 +21,8 @@              "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",              "TERM=xterm",              "OPTS=$OPTS", -            "PLAYBOOK_FILE=$PLAYBOOK_FILE" +            "PLAYBOOK_FILE=$PLAYBOOK_FILE", +            "ANSIBLE_CONFIG=$ANSIBLE_CONFIG"          ],          "cwd": "/opt/app-root/src/",          "rlimits": [ diff --git a/images/installer/system-container/root/exports/manifest.json b/images/installer/system-container/root/exports/manifest.json index f735494d4..321a84ee8 100644 --- a/images/installer/system-container/root/exports/manifest.json +++ b/images/installer/system-container/root/exports/manifest.json @@ -6,6 +6,7 @@          "VAR_LOG_OPENSHIFT_LOG": "/var/log/ansible.log",          "PLAYBOOK_FILE": "/usr/share/ansible/openshift-ansible/playbooks/byo/config.yml",  	"HOME_ROOT": "/root", +	"ANSIBLE_CONFIG": "/usr/share/ansible/openshift-ansible/ansible.cfg",          "INVENTORY_FILE": "/dev/null"      }  } diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 86b4de4b7..962a01a91 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -191,6 +191,13 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true',  # or  #openshift_master_request_header_ca_file=<path to local ca file to use> +# CloudForms Management Engine (ManageIQ) App Install +# +# Enables installation of MIQ server. Recommended for dedicated +# clusters only. See roles/openshift_cfme/README.md for instructions +# and requirements. +#openshift_cfme_install_app=False +  # Cloud Provider Configuration  #  # Note: You may make use of environment variables rather than store @@ -786,6 +793,12 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true',  # Enable API service auditing, available as of 1.3  #openshift_master_audit_config={"enabled": true} +# +# In case you want more advanced setup for the auditlog you can +# use this line. +# The directory in "auditFilePath" will be created if it's not +# exist +#openshift_master_audit_config={"enabled": true, "auditFilePath": "/var/log/openpaas-oscp-audit/openpaas-oscp-audit.log", "maximumFileRetentionDays": 14, "maximumFileSizeMegabytes": 500, "maximumRetainedFiles": 5}}  # Enable origin repos that point at Centos PAAS SIG, defaults to true, only used  # by deployment_type=origin diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index cbaf22810..63f1f00d2 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -190,6 +190,13 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true',  # or  #openshift_master_request_header_ca_file=<path to local ca file to use> +# CloudForms Management Engine (ManageIQ) App Install +# +# Enables installation of MIQ server. Recommended for dedicated +# clusters only. See roles/openshift_cfme/README.md for instructions +# and requirements. +#openshift_cfme_install_app=False +  # Cloud Provider Configuration  #  # Note: You may make use of environment variables rather than store @@ -786,6 +793,12 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true',  # Enable API service auditing, available as of 3.2  #openshift_master_audit_config={"enabled": true} +# +# In case you want more advanced setup for the auditlog you can +# use this line. +# The directory in "auditFilePath" will be created if it's not +# exist +#openshift_master_audit_config={"enabled": true, "auditFilePath": "/var/log/openpaas-oscp-audit/openpaas-oscp-audit.log", "maximumFileRetentionDays": 14, "maximumFileSizeMegabytes": 500, "maximumRetainedFiles": 5}}  # Validity of the auto-generated OpenShift certificates in days.  # See also openshift_hosted_registry_cert_expire_days above. diff --git a/openshift-ansible.spec b/openshift-ansible.spec index ec7429291..7b5587294 100644 --- a/openshift-ansible.spec +++ b/openshift-ansible.spec @@ -9,7 +9,7 @@  %global __requires_exclude ^/usr/bin/ansible-playbook$  Name:           openshift-ansible -Version:        3.6.109 +Version:        3.6.117  Release:        1%{?dist}  Summary:        Openshift and Atomic Enterprise Ansible  License:        ASL 2.0 @@ -280,6 +280,84 @@ Atomic OpenShift Utilities includes  %changelog +* Mon Jun 19 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.117-1 +- Run storage upgrade pre and post master upgrade (rteague@redhat.com) +- Introduce etcd migrate role (jchaloup@redhat.com) +- Add support for rhel, aci, vxlan (srampal@cisco.com) + +* Sun Jun 18 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.116-1 +- PAPR: define openshift_image_tag via command line (rhcarvalho@gmail.com) +- Ensure only one ES pod per PV (peter.portante@redhat.com) +- etcd v3 for clean installs (sdodson@redhat.com) +- Rename cockpit-shell -> cockpit-system (rhcarvalho@gmail.com) +- Update image repo name, images have been moved from 'cloudforms' to +  'cloudforms42' for CF 4.2. (simaishi@redhat.com) +- Update image repo name, images have been moved from 'cloudforms' to +  'cloudforms45' for CF 4.5. (simaishi@redhat.com) +- CloudForms 4.5 templates (simaishi@redhat.com) + +* Fri Jun 16 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.114-1 +-  + +* Fri Jun 16 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.113-1 +- Make rollout status check best-effort, add poll (skuznets@redhat.com) +- Verify the rollout status of the hosted router and registry +  (skuznets@redhat.com) +- fix es routes for new logging roles (rmeggins@redhat.com) + +* Thu Jun 15 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.112-1 +- Add the the other featured audit-config paramters as example (al- +  git001@none.at) + +* Thu Jun 15 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.111-1 +- doc: Info for system container installer options (smilner@redhat.com) +- Add ANSIBLE_CONFIG to system container installer (smilner@redhat.com) +- Add missing file. Remove debugging prompt. (tbielawa@redhat.com) +- Update readme one last time (tbielawa@redhat.com) +- Reconfigure masters in serial to avoid HA meltdowns (tbielawa@redhat.com) +- First POC of a CFME turnkey solution in openshift-anisble +  (tbielawa@redhat.com) +- Reverted most of this pr 4356 except:   adding +  openshift_logging_fluentd_buffer_queue_limit: 1024 +  openshift_logging_fluentd_buffer_size_limit: 1m +  openshift_logging_mux_buffer_queue_limit: 1024 +  openshift_logging_mux_buffer_size_limit: 1m   and setting the matched +  environment variables. (nhosoi@redhat.com) +- Adding the defaults for openshift_logging_fluentd_{cpu,memory}_limit to +  roles/openshift_logging_fluentd/defaults/main.yml. (nhosoi@redhat.com) +- Adding environment variables FLUENTD_CPU_LIMIT, FLUENTD_MEMORY_LIMIT, +  MUX_CPU_LIMIT, MUX_MEMORY_LIMIT. (nhosoi@redhat.com) +- Introducing fluentd/mux buffer_queue_limit, buffer_size_limit, cpu_limit, and +  memory_limit. (nhosoi@redhat.com) + +* Thu Jun 15 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.110-1 +- papr: add documentation to YAML and simplify context (jlebon@redhat.com) +- docs: better documentation for PAPR (jlebon@redhat.com) +- papr: install libffi-devel (jlebon@redhat.com) +- pre-install checks: add more during byo install (lmeyer@redhat.com) +- move etcd backup to etcd_common role (jchaloup@redhat.com) +- Support installing HOSA via ansible (mwringe@redhat.com) +- GlusterFS: Remove requirement for heketi-cli (jarrpa@redhat.com) +- GlusterFS: Fix bugs in wipe (jarrpa@redhat.com) +- GlusterFS: Skip heketi-cli install on Atomic (jarrpa@redhat.com) +- GlusterFS: Create a StorageClass if specified (jarrpa@redhat.com) +- GlusterFS: Use proper secrets (jarrpa@redhat.com) +- GlusterFS: Allow cleaner separation of multiple clusters (jarrpa@redhat.com) +- GlusterFS: Minor corrections and cleanups (jarrpa@redhat.com) +- GlusterFS: Improve documentation (jarrpa@redhat.com) +- GlusterFS: Allow configuration of kube namespace for heketi +  (jarrpa@redhat.com) +- GlusterFS: Adjust when clauses for registry config (jarrpa@redhat.com) +- GlusterFS: Allow failure reporting when deleting deploy-heketi +  (jarrpa@redhat.com) +- GlusterFS: Tweak pod probe parameters (jarrpa@redhat.com) +- GlusterFS: Allow for configuration of node selector (jarrpa@redhat.com) +- GlusterFS: Label on Openshift node name (jarrpa@redhat.com) +- GlusterFS: Make sure timeout is an int (jarrpa@redhat.com) +- GlusterFS: Use groups variables (jarrpa@redhat.com) +- papr: rename redhat-ci related files to papr (jlebon@redhat.com) +- singletonize some role tasks that repeat a lot (lmeyer@redhat.com) +  * Wed Jun 14 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.109-1  -  diff --git a/playbooks/adhoc/contiv/delete_contiv.yml b/playbooks/adhoc/contiv/delete_contiv.yml index 91948c72e..eec6c23a7 100644 --- a/playbooks/adhoc/contiv/delete_contiv.yml +++ b/playbooks/adhoc/contiv/delete_contiv.yml @@ -1,5 +1,5 @@  --- -- name: delete contiv +- name: Uninstall contiv    hosts: all    gather_facts: False    tasks: diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml index 97d835eae..27c3a9edd 100644 --- a/playbooks/adhoc/uninstall.yml +++ b/playbooks/adhoc/uninstall.yml @@ -103,7 +103,7 @@          - atomic-openshift-sdn-ovs          - cockpit-bridge          - cockpit-docker -        - cockpit-shell +        - cockpit-system          - cockpit-ws          - kubernetes-client          - openshift @@ -346,7 +346,7 @@      - atomic-openshift-master      - cockpit-bridge      - cockpit-docker -    - cockpit-shell +    - cockpit-system      - cockpit-ws      - corosync      - kubernetes-client diff --git a/playbooks/byo/openshift-cfme/config.yml b/playbooks/byo/openshift-cfme/config.yml new file mode 100644 index 000000000..0e8e7a94d --- /dev/null +++ b/playbooks/byo/openshift-cfme/config.yml @@ -0,0 +1,8 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml +  tags: +    - always + +- include: ../../common/openshift-cluster/evaluate_groups.yml + +- include: ../../common/openshift-cfme/config.yml diff --git a/playbooks/byo/openshift-cfme/uninstall.yml b/playbooks/byo/openshift-cfme/uninstall.yml new file mode 100644 index 000000000..c8ed16859 --- /dev/null +++ b/playbooks/byo/openshift-cfme/uninstall.yml @@ -0,0 +1,6 @@ +--- +# - include: ../openshift-cluster/initialize_groups.yml +#   tags: +#     - always + +- include: ../../common/openshift-cfme/uninstall.yml diff --git a/playbooks/byo/openshift-cluster/config.yml b/playbooks/byo/openshift-cluster/config.yml index fd4a9eb26..2372a5322 100644 --- a/playbooks/byo/openshift-cluster/config.yml +++ b/playbooks/byo/openshift-cluster/config.yml @@ -15,6 +15,11 @@        checks:        - disk_availability        - memory_availability +      - package_availability +      - package_update +      - package_version +      - docker_image_availability +      - docker_storage  - include: ../../common/openshift-cluster/std_include.yml    tags: diff --git a/playbooks/byo/openshift-etcd/migrate.yml b/playbooks/byo/openshift-etcd/migrate.yml new file mode 100644 index 000000000..fd02e066e --- /dev/null +++ b/playbooks/byo/openshift-etcd/migrate.yml @@ -0,0 +1,124 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml +  tags: +  - always + +- include: ../../common/openshift-cluster/evaluate_groups.yml +  tags: +  - always + +- name: Run pre-checks +  hosts: oo_etcd_to_config +  tags: +  - always +  roles: +  - role: etcd_migrate +    r_etcd_migrate_action: check +    etcd_peer: "{{ ansible_default_ipv4.address }}" + +# TODO(jchaloup): replace the std_include with something minimal so the entire playbook is faster +# e.g. I don't need to detect the OCP version, install deps, etc. +- include: ../../common/openshift-cluster/std_include.yml +  tags: +  - always + +- name: Backup v2 data +  hosts: oo_etcd_to_config +  gather_facts: no +  tags: +  - always +  roles: +  - role: openshift_facts +  - role: etcd_common +    r_etcd_common_action: backup +    r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" +    r_etcd_common_backup_tag: pre-migration +    r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" +    r_etcd_common_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" + +- name: Gate on etcd backup +  hosts: localhost +  connection: local +  become: no +  tasks: +  - set_fact: +      etcd_backup_completed: "{{ hostvars +                                 | oo_select_keys(groups.oo_etcd_to_config) +                                 | oo_collect('inventory_hostname', {'r_etcd_common_backup_complete': true}) }}" +  - set_fact: +      etcd_backup_failed: "{{ groups.oo_etcd_to_config | difference(etcd_backup_completed) }}" +  - fail: +      msg: "Migration cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" +    when: +    - etcd_backup_failed | length > 0 + +- name: Prepare masters for etcd data migration +  hosts: oo_masters_to_config +  tasks: +  - set_fact: +      master_services: +      - "{{ openshift.common.service_type + '-master' }}" +  - set_fact: +      master_services: +      - "{{ openshift.common.service_type + '-master-controllers' }}" +      - "{{ openshift.common.service_type + '-master-api' }}" +    when: +    - (openshift_master_cluster_method is defined and openshift_master_cluster_method == "native") or openshift.common.is_master_system_container | bool +  - debug: +      msg: "master service name: {{ master_services }}" +  - name: Stop masters +    service: +      name: "{{ item }}" +      state: stopped +    with_items: "{{ master_services }}" + +- name: Migrate etcd data from v2 to v3 +  hosts: oo_etcd_to_config +  gather_facts: no +  tags: +  - always +  roles: +  - role: etcd_migrate +    r_etcd_migrate_action: migrate +    etcd_peer: "{{ ansible_default_ipv4.address }}" + +- name: Gate on etcd migration +  hosts: oo_masters_to_config +  gather_facts: no +  tasks: +  - set_fact: +      etcd_migration_completed: "{{ hostvars +                                 | oo_select_keys(groups.oo_etcd_to_config) +                                 | oo_collect('inventory_hostname', {'r_etcd_migrate_success': true}) }}" +  - set_fact: +      etcd_migration_failed: "{{ groups.oo_etcd_to_config | difference(etcd_migration_completed) }}" + +- name: Configure masters if etcd data migration is succesfull +  hosts: oo_masters_to_config +  roles: +  - role: etcd_migrate +    r_etcd_migrate_action: configure +    when: etcd_migration_failed | length == 0 +  tasks: +  - debug: +      msg: "Skipping master re-configuration since migration failed." +    when: +    - etcd_migration_failed | length > 0 + +- name: Start masters after etcd data migration +  hosts: oo_masters_to_config +  tasks: +  - name: Start master services +    service: +      name: "{{ item }}" +      state: started +    register: service_status +    # Sometimes the master-api, resp. master-controllers fails to start for the first time +    until: service_status.state is defined and service_status.state == "started" +    retries: 5 +    delay: 10 +    with_items: "{{ master_services[::-1] }}" +  - fail: +      msg: "Migration failed. The following hosts were not properly migrated: {{ etcd_migration_failed | join(',') }}" +    when: +    - etcd_migration_failed | length > 0 diff --git a/playbooks/common/openshift-cfme/config.yml b/playbooks/common/openshift-cfme/config.yml new file mode 100644 index 000000000..533a35d9e --- /dev/null +++ b/playbooks/common/openshift-cfme/config.yml @@ -0,0 +1,44 @@ +--- +# TODO: Make this work. The 'name' variable below is undefined +# presently because it's part of the cfme role. This play can't run +# until that's re-worked. +# +# - name: Pre-Pull manageiq-pods docker images +#   hosts: nodes +#   tasks: +#   - name: Ensure the latest manageiq-pods docker image is pulling +#     docker_image: +#       name: "{{ openshift_cfme_container_image }}" +#     # Fire-and-forget method, never timeout +#     async: 99999999999 +#     # F-a-f, never check on this. True 'background' task. +#     poll: 0 + +- name: Configure Masters for CFME Bulk Image Imports +  hosts: oo_masters_to_config +  serial: 1 +  tasks: +  - name: Run master cfme tuning playbook +    include_role: +      name: openshift_cfme +      tasks_from: tune_masters + +- name: Setup CFME +  hosts: oo_first_master +  vars: +    r_openshift_cfme_miq_template_content: "{{ lookup('file', 'roles/openshift_cfme/files/miq-template.yaml') | from_yaml}}" +  pre_tasks: +  - name: Create a temporary place to evaluate the PV templates +    command: mktemp -d /tmp/openshift-ansible-XXXXXXX +    register: r_openshift_cfme_mktemp +    changed_when: false +  - name: Ensure the server template was read from disk +    debug: +      msg="{{ r_openshift_cfme_miq_template_content | from_yaml }}" + +  tasks: +  - name: Run the CFME Setup Role +    include_role: +      name: openshift_cfme +    vars: +      template_dir: "{{ hostvars[groups.masters.0].r_openshift_cfme_mktemp.stdout }}" diff --git a/playbooks/common/openshift-cfme/filter_plugins b/playbooks/common/openshift-cfme/filter_plugins new file mode 120000 index 000000000..99a95e4ca --- /dev/null +++ b/playbooks/common/openshift-cfme/filter_plugins @@ -0,0 +1 @@ +../../../filter_plugins
\ No newline at end of file diff --git a/playbooks/common/openshift-cfme/library b/playbooks/common/openshift-cfme/library new file mode 120000 index 000000000..ba40d2f56 --- /dev/null +++ b/playbooks/common/openshift-cfme/library @@ -0,0 +1 @@ +../../../library
\ No newline at end of file diff --git a/playbooks/common/openshift-cfme/roles b/playbooks/common/openshift-cfme/roles new file mode 120000 index 000000000..20c4c58cf --- /dev/null +++ b/playbooks/common/openshift-cfme/roles @@ -0,0 +1 @@ +../../../roles
\ No newline at end of file diff --git a/playbooks/common/openshift-cfme/uninstall.yml b/playbooks/common/openshift-cfme/uninstall.yml new file mode 100644 index 000000000..78b8e7668 --- /dev/null +++ b/playbooks/common/openshift-cfme/uninstall.yml @@ -0,0 +1,8 @@ +--- +- name: Uninstall CFME +  hosts: masters +  tasks: +  - name: Run the CFME Uninstall Role Tasks +    include_role: +      name: openshift_cfme +      tasks_from: uninstall diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index b980909eb..5c19df4c5 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -3,6 +3,16 @@  # Upgrade Masters  ############################################################################### +# oc adm migrate storage should be run prior to etcd v3 upgrade +# See: https://github.com/openshift/origin/pull/14625#issuecomment-308467060 +- name: Pre master upgrade - Upgrade job storage +  hosts: oo_first_master +  tasks: +  - name: Upgrade job storage +    command: > +      {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig +      migrate storage --include=jobs --confirm +  # If facts cache were for some reason deleted, this fact may not be set, and if not set  # it will always default to true. This causes problems for the etcd data dir fact detection  # so we must first make sure this is set correctly before attempting the backup. @@ -133,6 +143,14 @@    - set_fact:        master_update_complete: True +- name: Post master upgrade - Upgrade job storage +  hosts: oo_first_master +  tasks: +  - name: Upgrade job storage +    command: > +      {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig +      migrate storage --include=jobs --confirm +  ##############################################################################  # Gate on master update complete  ############################################################################## diff --git a/playbooks/common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml deleted file mode 100644 index 48c69eccd..000000000 --- a/playbooks/common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -############################################################################### -# Post upgrade - Upgrade job storage -############################################################################### -- name: Upgrade job storage -  hosts: oo_first_master -  roles: -  - { role: openshift_cli } -  vars: -    # Another spot where we assume docker is running and do not want to accidentally trigger an unsafe -    # restart. -    skip_docker_role: True -  tasks: -  - name: Upgrade job storage -    command: > -      {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig -      migrate storage --include=jobs --confirm -    run_once: true diff --git a/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade.yml index e63b03e51..4e7c14e94 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade.yml @@ -115,5 +115,3 @@  - include: ../upgrade_nodes.yml  - include: ../post_control_plane.yml - -- include: storage_upgrade.yml diff --git a/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml index 74c2964aa..45b664d06 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml @@ -119,5 +119,3 @@      master_config_hook: "v3_5/master_config_upgrade.yml"  - include: ../post_control_plane.yml - -- include: storage_upgrade.yml diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/storage_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/storage_upgrade.yml deleted file mode 100644 index 48c69eccd..000000000 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/storage_upgrade.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -############################################################################### -# Post upgrade - Upgrade job storage -############################################################################### -- name: Upgrade job storage -  hosts: oo_first_master -  roles: -  - { role: openshift_cli } -  vars: -    # Another spot where we assume docker is running and do not want to accidentally trigger an unsafe -    # restart. -    skip_docker_role: True -  tasks: -  - name: Upgrade job storage -    command: > -      {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig -      migrate storage --include=jobs --confirm -    run_once: true diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml index 5d41b84d0..5b9ac9e8f 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml @@ -115,5 +115,3 @@  - include: ../upgrade_nodes.yml  - include: ../post_control_plane.yml - -- include: storage_upgrade.yml diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml index a66fb51ff..a470c7595 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml @@ -119,5 +119,3 @@      master_config_hook: "v3_6/master_config_upgrade.yml"  - include: ../post_control_plane.yml - -- include: storage_upgrade.yml diff --git a/playbooks/common/openshift-master/config.yml b/playbooks/common/openshift-master/config.yml index ddc4db8f8..429460b2c 100644 --- a/playbooks/common/openshift-master/config.yml +++ b/playbooks/common/openshift-master/config.yml @@ -20,6 +20,15 @@      - node      - .config_managed +  - name: Check for existing configuration +    stat: +      path: /etc/origin/master/master-config.yaml +    register: master_config_stat + +  - name: Set clean install fact +    set_fact: +      l_clean_install: "{{ not master_config_stat.stat.exists }}" +    - set_fact:        openshift_master_pod_eviction_timeout: "{{ lookup('oo_option', 'openshift_master_pod_eviction_timeout') | default(none, true) }}"      when: openshift_master_pod_eviction_timeout is not defined @@ -122,6 +131,7 @@      etcd_cert_subdir: "openshift-master-{{ openshift.common.hostname }}"      etcd_cert_config_dir: "{{ openshift.common.config_base }}/master"      etcd_cert_prefix: "master.etcd-" +    r_openshift_master_clean_install: hostvars[groups.oo_first_master.0].l_clean_install    - role: nuage_master      when: openshift.common.use_nuage | bool    - role: calico_master diff --git a/roles/cockpit/tasks/main.yml b/roles/cockpit/tasks/main.yml index bddad778f..57f49ea11 100644 --- a/roles/cockpit/tasks/main.yml +++ b/roles/cockpit/tasks/main.yml @@ -3,7 +3,7 @@    package: name={{ item }} state=present    with_items:      - cockpit-ws -    - cockpit-shell +    - cockpit-system      - cockpit-bridge      - cockpit-docker      - "{{ cockpit_plugins }}" diff --git a/roles/contiv/defaults/main.yml b/roles/contiv/defaults/main.yml index 1ccae61f2..8c4d19537 100644 --- a/roles/contiv/defaults/main.yml +++ b/roles/contiv/defaults/main.yml @@ -1,12 +1,12 @@  ---  # The version of Contiv binaries to use -contiv_version: 1.0.0-beta.3-02-21-2017.20-52-42.UTC +contiv_version: 1.0.1  # The version of cni binaries  cni_version: v0.4.0 -contiv_default_subnet: "20.1.1.1/24" -contiv_default_gw: "20.1.1.254" +contiv_default_subnet: "10.128.0.0/16" +contiv_default_gw: "10.128.254.254"  # TCP port that Netmaster listens for network connections  netmaster_port: 9999 @@ -69,6 +69,9 @@ netplugin_fwd_mode: bridge  # Contiv fabric mode aci|default  contiv_fabric_mode: default +# Global VLAN range +contiv_vlan_range: "2900-3000" +  # Encapsulation type vlan|vxlan to use for instantiating container networks  contiv_encap_mode: vlan @@ -78,8 +81,8 @@ netplugin_driver: ovs  # Create a default Contiv network for use by pods  contiv_default_network: true -# VLAN/ VXLAN tag value to be used for the default network -contiv_default_network_tag: 1 +# Statically configured tag for default network (if needed) +contiv_default_network_tag: ""  #SRFIXME (use the openshift variables)  https_proxy: "" @@ -95,6 +98,9 @@ apic_leaf_nodes: ""  apic_phys_dom: ""  apic_contracts_unrestricted_mode: no  apic_epg_bridge_domain: not_specified +apic_configure_default_policy: false +apic_default_external_contract: "uni/tn-common/brc-default" +apic_default_app_profile: "contiv-infra-app-profile"  is_atomic: False  kube_cert_dir: "/data/src/github.com/openshift/origin/openshift.local.config/master"  master_name: "{{ groups['masters'][0] }}" @@ -104,3 +110,12 @@ kube_ca_cert: "{{ kube_cert_dir }}/ca.crt"  kube_key: "{{ kube_cert_dir }}/admin.key"  kube_cert: "{{ kube_cert_dir }}/admin.crt"  kube_master_api_port: 8443 + +# contivh1 default subnet and gateway +#contiv_h1_subnet_default: "132.1.1.0/24" +#contiv_h1_gw_default: "132.1.1.1" +contiv_h1_subnet_default: "10.129.0.0/16" +contiv_h1_gw_default: "10.129.0.1" + +# contiv default private subnet for ext access +contiv_private_ext_subnet: "10.130.0.0/16" diff --git a/roles/contiv/meta/main.yml b/roles/contiv/meta/main.yml index 3223afb6e..da6409f1e 100644 --- a/roles/contiv/meta/main.yml +++ b/roles/contiv/meta/main.yml @@ -26,3 +26,5 @@ dependencies:    etcd_url_scheme: http    etcd_peer_url_scheme: http    when: contiv_role == "netmaster" +- role: contiv_auth_proxy +  when: (contiv_role == "netmaster") and (contiv_enable_auth_proxy == true) diff --git a/roles/contiv/tasks/default_network.yml b/roles/contiv/tasks/default_network.yml index 9cf98bb80..f679443e0 100644 --- a/roles/contiv/tasks/default_network.yml +++ b/roles/contiv/tasks/default_network.yml @@ -6,10 +6,53 @@    retries: 9    delay: 10 +- name: Contiv | Set globals +  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" global set --fabric-mode {{ contiv_fabric_mode }} --vlan-range {{ contiv_vlan_range }} --fwd-mode {{ netplugin_fwd_mode }} --private-subnet {{ contiv_private_ext_subnet }}' + +- name: Contiv | Set arp mode to flood if ACI +  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" global set --arp-mode flood' +  when: contiv_fabric_mode == "aci" +  - name: Contiv | Check if default-net exists    command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net ls'    register: net_result  - name: Contiv | Create default-net -  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net create --subnet={{ contiv_default_subnet }} -e {{ contiv_encap_mode }} -p {{ contiv_default_network_tag }} --gateway={{ contiv_default_gw }} default-net' +  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net create --subnet={{ contiv_default_subnet }} -e {{ contiv_encap_mode }} -p {{ contiv_default_network_tag }} --gateway {{ contiv_default_gw }} default-net'    when: net_result.stdout.find("default-net") == -1 + +- name: Contiv | Create host access infra network for VxLan routing case +  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net create --subnet={{ contiv_h1_subnet_default }} --gateway={{ contiv_h1_gw_default }} --nw-type="infra" contivh1' +  when: (contiv_encap_mode == "vxlan") and (netplugin_fwd_mode == "routing") + +#- name: Contiv | Create an allow-all policy for the default-group +#  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" policy create ose-allow-all-policy' +#  when: contiv_fabric_mode == "aci" + +- name: Contiv | Set up aci external contract to consume default external contract +  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" external-contracts create -c -a {{ apic_default_external_contract }} oseExtToConsume' +  when: (contiv_fabric_mode == "aci") and (apic_configure_default_policy == true) + +- name: Contiv | Set up aci external contract to provide default external contract +  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" external-contracts create -p -a {{ apic_default_external_contract }} oseExtToProvide' +  when: (contiv_fabric_mode == "aci") and (apic_configure_default_policy == true) + +- name: Contiv | Create aci default-group +  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" group create default-net default-group' +  when: contiv_fabric_mode == "aci" + +- name: Contiv | Add external contracts to the default-group +  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" group create -e oseExtToConsume -e oseExtToProvide default-net default-group' +  when: (contiv_fabric_mode == "aci") and (apic_configure_default_policy == true) + +#- name: Contiv | Add policy rule 1 for allow-all policy +#  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" policy rule-add -d in --action allow ose-allow-all-policy 1' +#  when: contiv_fabric_mode == "aci" + +#- name: Contiv | Add policy rule 2 for allow-all policy +#  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" policy rule-add -d out --action allow ose-allow-all-policy 2' +#  when: contiv_fabric_mode == "aci" + +- name: Contiv | Create default aci app profile +  command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" app-profile create -g default-group {{ apic_default_app_profile }}' +  when: contiv_fabric_mode == "aci" diff --git a/roles/contiv/tasks/netmaster.yml b/roles/contiv/tasks/netmaster.yml index 5057767b8..acaf7386e 100644 --- a/roles/contiv/tasks/netmaster.yml +++ b/roles/contiv/tasks/netmaster.yml @@ -23,7 +23,7 @@      line: "{{ hostvars[item]['ansible_' + netmaster_interface].ipv4.address }} netmaster"      state: present    when: hostvars[item]['ansible_' + netmaster_interface].ipv4.address is defined -  with_items: groups['masters'] +  with_items: "{{ groups['masters'] }}"  - name: Netmaster | Create netmaster symlinks    file: diff --git a/roles/contiv/tasks/netplugin_iptables.yml b/roles/contiv/tasks/netplugin_iptables.yml index 8c348ac67..184c595c5 100644 --- a/roles/contiv/tasks/netplugin_iptables.yml +++ b/roles/contiv/tasks/netplugin_iptables.yml @@ -23,7 +23,36 @@    notify: Save iptables rules  - name: Netplugin IPtables | Open vxlan port with iptables -  command: /sbin/iptables -I INPUT 1 -p udp --dport 8472 -j ACCEPT -m comment --comment "vxlan" +  command: /sbin/iptables -I INPUT 1 -p udp --dport 8472 -j ACCEPT -m comment --comment "netplugin vxlan 8472" +  when: iptablesrules.stdout.find("netplugin vxlan 8472") == -1 +  notify: Save iptables rules  - name: Netplugin IPtables | Open vxlan port with iptables -  command: /sbin/iptables -I INPUT 1 -p udp --dport 4789 -j ACCEPT -m comment --comment "vxlan" +  command: /sbin/iptables -I INPUT 1 -p udp --dport 4789 -j ACCEPT -m comment --comment "netplugin vxlan 4789" +  when: iptablesrules.stdout.find("netplugin vxlan 4789") == -1 +  notify: Save iptables rules + +- name: Netplugin IPtables | Allow from contivh0 +  command: /sbin/iptables -I FORWARD 1 -i contivh0 -j ACCEPT -m comment --comment "contivh0 FORWARD input" +  when: iptablesrules.stdout.find("contivh0 FORWARD input") == -1 +  notify: Save iptables rules + +- name: Netplugin IPtables | Allow to contivh0 +  command: /sbin/iptables -I FORWARD 1 -o contivh0 -j ACCEPT -m comment --comment "contivh0 FORWARD output" +  when: iptablesrules.stdout.find("contivh0 FORWARD output") == -1 +  notify: Save iptables rules + +- name: Netplugin IPtables | Allow from contivh1 +  command: /sbin/iptables -I FORWARD 1 -i contivh1 -j ACCEPT -m comment --comment "contivh1 FORWARD input" +  when: iptablesrules.stdout.find("contivh1 FORWARD input") == -1 +  notify: Save iptables rules + +- name: Netplugin IPtables | Allow to contivh1 +  command: /sbin/iptables -I FORWARD 1 -o contivh1 -j ACCEPT -m comment --comment "contivh1 FORWARD output" +  when: iptablesrules.stdout.find("contivh1 FORWARD output") == -1 +  notify: Save iptables rules + +- name: Netplugin IPtables | Allow dns +  command: /sbin/iptables -I INPUT 1 -p udp --dport 53 -j ACCEPT -m comment --comment "contiv dns" +  when: iptablesrules.stdout.find("contiv dns") == -1 +  notify: Save iptables rules diff --git a/roles/contiv/tasks/packageManagerInstall.yml b/roles/contiv/tasks/packageManagerInstall.yml index 2eff1b85f..e0d48e643 100644 --- a/roles/contiv/tasks/packageManagerInstall.yml +++ b/roles/contiv/tasks/packageManagerInstall.yml @@ -4,9 +4,10 @@      did_install: false  - include: pkgMgrInstallers/centos-install.yml -  when: ansible_distribution == "CentOS" and not is_atomic +  when: (ansible_os_family == "RedHat") and +        not is_atomic  - name: Package Manager | Set fact saying we did CentOS package install    set_fact:      did_install: true -  when: ansible_distribution == "CentOS" +  when: (ansible_os_family == "RedHat") diff --git a/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml b/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml index 51c3d35ac..91e6aadf3 100644 --- a/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml +++ b/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml @@ -1,13 +1,13 @@  --- -- name: PkgMgr CentOS | Install net-tools pkg for route +- name: PkgMgr RHEL/CentOS | Install net-tools pkg for route    yum:      pkg=net-tools      state=latest -- name: PkgMgr CentOS | Get openstack kilo rpm +- name: PkgMgr RHEL/CentOS | Get openstack ocata rpm    get_url: -    url: https://repos.fedorapeople.org/repos/openstack/openstack-kilo/rdo-release-kilo-2.noarch.rpm -    dest: /tmp/rdo-release-kilo-2.noarch.rpm +    url: https://repos.fedorapeople.org/repos/openstack/openstack-ocata/rdo-release-ocata-2.noarch.rpm +    dest: /tmp/rdo-release-ocata-2.noarch.rpm      validate_certs: False    environment:      http_proxy: "{{ http_proxy|default('') }}" @@ -16,15 +16,15 @@    tags:      - ovs_install -- name: PkgMgr CentOS | Install openstack kilo rpm -  yum: name=/tmp/rdo-release-kilo-2.noarch.rpm state=present +- name: PkgMgr RHEL/CentOS | Install openstack ocata rpm +  yum: name=/tmp/rdo-release-ocata-2.noarch.rpm state=present    tags:      - ovs_install -- name: PkgMgr CentOS | Install ovs +- name: PkgMgr RHEL/CentOS | Install ovs    yum: -    pkg=openvswitch -    state=latest +    pkg=openvswitch-2.5.0-2.el7.x86_64 +    state=present    environment:      http_proxy: "{{ http_proxy|default('') }}"      https_proxy: "{{ https_proxy|default('') }}" diff --git a/roles/contiv/templates/netplugin.j2 b/roles/contiv/templates/netplugin.j2 index f3d26c037..a4928cc3d 100644 --- a/roles/contiv/templates/netplugin.j2 +++ b/roles/contiv/templates/netplugin.j2 @@ -1,9 +1,7 @@  {% if contiv_encap_mode == "vlan" %}  NETPLUGIN_ARGS='-vlan-if {{ netplugin_interface }} -ctrl-ip {{ netplugin_ctrl_ip }} -plugin-mode kubernetes -cluster-store etcd://{{ etcd_url }}'  {% endif %} -{#   Note: Commenting out vxlan encap mode support until it is fully supported  {% if contiv_encap_mode == "vxlan" %} -NETPLUGIN_ARGS='-vtep-ip {{ netplugin_ctrl_ip }} -e {{contiv_encap_mode}} -ctrl-ip {{ netplugin_ctrl_ip }} -plugin-mode kubernetes -cluster-store etcd://{{ etcd_url }}' +NETPLUGIN_ARGS='-vtep-ip {{ netplugin_ctrl_ip }} -ctrl-ip {{ netplugin_ctrl_ip }} -plugin-mode kubernetes -cluster-store etcd://{{ etcd_url }}'  {% endif %} -#} diff --git a/roles/contiv_auth_proxy/README.md b/roles/contiv_auth_proxy/README.md new file mode 100644 index 000000000..287b6c148 --- /dev/null +++ b/roles/contiv_auth_proxy/README.md @@ -0,0 +1,29 @@ +Role Name +========= + +Role to install Contiv API Proxy and UI + +Requirements +------------ + +Docker needs to be installed to run the auth proxy container. + +Role Variables +-------------- + +auth_proxy_image specifies the image with version tag to be used to spin up the auth proxy container. +auth_proxy_cert, auth_proxy_key specify files to use for the proxy server certificates. +auth_proxy_port is the host port and auth_proxy_datastore the cluster data store address. + +Dependencies +------------ + +docker + +Example Playbook +---------------- + +- hosts: netplugin-node +  become: true +      roles: +        - { role: auth_proxy, auth_proxy_port: 10000, auth_proxy_datastore: etcd://netmaster:22379 } diff --git a/roles/contiv_auth_proxy/defaults/main.yml b/roles/contiv_auth_proxy/defaults/main.yml new file mode 100644 index 000000000..4e637a947 --- /dev/null +++ b/roles/contiv_auth_proxy/defaults/main.yml @@ -0,0 +1,11 @@ +--- +auth_proxy_image: "contiv/auth_proxy:1.0.0-beta.2" +auth_proxy_port: 10000 +contiv_certs: "/var/contiv/certs" +cluster_store: "{{ hostvars[groups['masters'][0]]['ansible_' + netmaster_interface].ipv4.address }}:22379" +auth_proxy_cert: "{{ contiv_certs }}/auth_proxy_cert.pem" +auth_proxy_key: "{{ contiv_certs }}/auth_proxy_key.pem" +auth_proxy_datastore: "{{ cluster_store }}" +auth_proxy_binaries: "/var/contiv_cache" +auth_proxy_local_install: False +auth_proxy_rule_comment: "Contiv auth proxy service" diff --git a/roles/contiv_auth_proxy/files/auth-proxy.service b/roles/contiv_auth_proxy/files/auth-proxy.service new file mode 100644 index 000000000..7cd2edff1 --- /dev/null +++ b/roles/contiv_auth_proxy/files/auth-proxy.service @@ -0,0 +1,13 @@ +[Unit] +Description=Contiv Proxy and UI +After=auditd.service systemd-user-sessions.service time-sync.target docker.service + +[Service] +ExecStart=/usr/bin/auth_proxy.sh start +ExecStop=/usr/bin/auth_proxy.sh stop +KillMode=control-group +Restart=on-failure +RestartSec=10 + +[Install] +WantedBy=multi-user.target diff --git a/roles/contiv_auth_proxy/handlers/main.yml b/roles/contiv_auth_proxy/handlers/main.yml new file mode 100644 index 000000000..9cb9bea49 --- /dev/null +++ b/roles/contiv_auth_proxy/handlers/main.yml @@ -0,0 +1,2 @@ +--- +# handlers file for auth_proxy diff --git a/roles/contiv_auth_proxy/tasks/cleanup.yml b/roles/contiv_auth_proxy/tasks/cleanup.yml new file mode 100644 index 000000000..a29659cc9 --- /dev/null +++ b/roles/contiv_auth_proxy/tasks/cleanup.yml @@ -0,0 +1,10 @@ +--- + +- name: stop auth-proxy container +  service: name=auth-proxy state=stopped + +- name: cleanup iptables for auth proxy +  shell: iptables -D INPUT -p tcp --dport {{ item }} -j ACCEPT -m comment --comment "{{ auth_proxy_rule_comment }} ({{ item }})" +  become: true +  with_items: +    - "{{ auth_proxy_port }}" diff --git a/roles/contiv_auth_proxy/tasks/main.yml b/roles/contiv_auth_proxy/tasks/main.yml new file mode 100644 index 000000000..74e7bf794 --- /dev/null +++ b/roles/contiv_auth_proxy/tasks/main.yml @@ -0,0 +1,37 @@ +--- +# tasks file for auth_proxy +- name: setup iptables for auth proxy +  shell: > +      ( iptables -L INPUT | grep "{{ auth_proxy_rule_comment }} ({{ item }})" ) || \ +      iptables -I INPUT 1 -p tcp --dport {{ item }} -j ACCEPT -m comment --comment "{{ auth_proxy_rule_comment }} ({{ item }})" +  become: true +  with_items: +    - "{{ auth_proxy_port }}" + +# Load the auth-proxy-image from local tar. Ignore any errors to handle the +# case where the image is not built in +- name: copy auth-proxy image +  copy: src={{ auth_proxy_binaries }}/auth-proxy-image.tar dest=/tmp/auth-proxy-image.tar +  when: auth_proxy_local_install == True + +- name: load auth-proxy image +  shell: docker load -i /tmp/auth-proxy-image.tar +  when: auth_proxy_local_install == True + +- name: create cert folder for proxy +  file: path=/var/contiv/certs state=directory + +- name: copy shell script for starting auth-proxy +  template: src=auth_proxy.j2 dest=/usr/bin/auth_proxy.sh mode=u=rwx,g=rx,o=rx + +- name: copy cert for starting auth-proxy +  copy: src=cert.pem dest=/var/contiv/certs/auth_proxy_cert.pem mode=u=rw,g=r,o=r + +- name: copy key for starting auth-proxy +  copy: src=key.pem dest=/var/contiv/certs/auth_proxy_key.pem mode=u=rw,g=r,o=r + +- name: copy systemd units for auth-proxy +  copy: src=auth-proxy.service dest=/etc/systemd/system/auth-proxy.service + +- name: start auth-proxy container +  systemd: name=auth-proxy daemon_reload=yes state=started enabled=yes diff --git a/roles/contiv_auth_proxy/templates/auth_proxy.j2 b/roles/contiv_auth_proxy/templates/auth_proxy.j2 new file mode 100644 index 000000000..e82e5b4ab --- /dev/null +++ b/roles/contiv_auth_proxy/templates/auth_proxy.j2 @@ -0,0 +1,36 @@ +#!/bin/bash + +usage="$0 start/stop" +if [ $# -ne 1 ]; then +    echo USAGE: $usage +    exit 1 +fi + +case $1 in +start) +    set -e + +    /usr/bin/docker run --rm \ +      -p 10000:{{ auth_proxy_port }} \ +      --net=host --name=auth-proxy \ +      -e NO_NETMASTER_STARTUP_CHECK=1 \ +      -v /var/contiv:/var/contiv \ +      {{ auth_proxy_image }} \ +      --tls-key-file={{ auth_proxy_key }} \ +      --tls-certificate={{ auth_proxy_cert }} \ +      --data-store-address={{ auth_proxy_datastore }} \ +      --netmaster-address={{ service_vip }}:9999 \ +      --listen-address=:10000  +    ;; + +stop) +    # don't stop on error +    /usr/bin/docker stop auth-proxy +    /usr/bin/docker rm -f -v  auth-proxy +    ;; + +*) +    echo USAGE: $usage +    exit 1 +    ;; +esac diff --git a/roles/contiv_auth_proxy/tests/inventory b/roles/contiv_auth_proxy/tests/inventory new file mode 100644 index 000000000..d18580b3c --- /dev/null +++ b/roles/contiv_auth_proxy/tests/inventory @@ -0,0 +1 @@ +localhost
\ No newline at end of file diff --git a/roles/contiv_auth_proxy/tests/test.yml b/roles/contiv_auth_proxy/tests/test.yml new file mode 100644 index 000000000..2af3250cd --- /dev/null +++ b/roles/contiv_auth_proxy/tests/test.yml @@ -0,0 +1,5 @@ +--- +- hosts: localhost +  remote_user: root +  roles: +    - auth_proxy diff --git a/roles/contiv_auth_proxy/vars/main.yml b/roles/contiv_auth_proxy/vars/main.yml new file mode 100644 index 000000000..9032766c4 --- /dev/null +++ b/roles/contiv_auth_proxy/vars/main.yml @@ -0,0 +1,2 @@ +--- +# vars file for auth_proxy diff --git a/roles/contiv_facts/defaults/main.yaml b/roles/contiv_facts/defaults/main.yaml index a6c08fa63..7b8150954 100644 --- a/roles/contiv_facts/defaults/main.yaml +++ b/roles/contiv_facts/defaults/main.yaml @@ -8,3 +8,6 @@ bin_dir: /usr/bin  ansible_temp_dir: /tmp/.ansible/files  source_type: packageManager + +# Whether or not to also install and enable the Contiv auth_proxy +contiv_enable_auth_proxy: false diff --git a/roles/etcd_migrate/README.md b/roles/etcd_migrate/README.md new file mode 100644 index 000000000..369e78ff2 --- /dev/null +++ b/roles/etcd_migrate/README.md @@ -0,0 +1,53 @@ +Role Name +========= + +Offline etcd migration of data from v2 to v3 + +Requirements +------------ + +It is expected all consumers of the etcd data are not accessing the data. +Otherwise the migrated data can be out-of-sync with the v2 and can result in unhealthy etcd cluster. + +The role itself is responsible for: +- checking etcd cluster health and raft status before the migration +- checking of presence of any v3 data (in that case the migration is stopped) +- migration of v2 data to v3 data (including attaching leases of keys prefixed with "/kubernetes.io/events" and "/kubernetes.io/masterleases" string) +- validation of migrated data (all v2 keys and in v3 keys and are set to the identical value) + +The migration itself requires an etcd member to be down in the process. Once the migration is done, the etcd member is started. + +Role Variables +-------------- + +TBD + +Dependencies +------------ + +- etcd_common +- lib_utils + +Example Playbook +---------------- + +```yaml +- name: Migrate etcd data from v2 to v3 +  hosts: oo_etcd_to_config +  gather_facts: no +  tasks: +  - include_role: +      name: openshift_etcd_migrate +    vars: +      etcd_peer: "{{ ansible_default_ipv4.address }}" +``` + +License +------- + +Apache License, Version 2.0 + +Author Information +------------------ + +Jan Chaloupka (jchaloup@redhat.com) diff --git a/roles/etcd_migrate/defaults/main.yml b/roles/etcd_migrate/defaults/main.yml new file mode 100644 index 000000000..05cf41fbb --- /dev/null +++ b/roles/etcd_migrate/defaults/main.yml @@ -0,0 +1,3 @@ +--- +# Default action when calling this role, choices: check, migrate, configure +r_etcd_migrate_action: migrate diff --git a/roles/etcd_migrate/meta/main.yml b/roles/etcd_migrate/meta/main.yml new file mode 100644 index 000000000..f3cabbef6 --- /dev/null +++ b/roles/etcd_migrate/meta/main.yml @@ -0,0 +1,17 @@ +--- +galaxy_info: +  author: Jan Chaloupka +  description: Etcd migration +  company: Red Hat, Inc. +  license: Apache License, Version 2.0 +  min_ansible_version: 2.1 +  platforms: +  - name: EL +    versions: +    - 7 +  categories: +  - cloud +  - system +dependencies: +- { role: etcd_common } +- { role: lib_utils } diff --git a/roles/etcd_migrate/tasks/check.yml b/roles/etcd_migrate/tasks/check.yml new file mode 100644 index 000000000..2f07713bc --- /dev/null +++ b/roles/etcd_migrate/tasks/check.yml @@ -0,0 +1,55 @@ +--- +# Check the cluster is healthy +- include: check_cluster_health.yml + +# Check if the member has v3 data already +# Run the migration only if the data are v2 +- name: Check if there are any v3 data +  command: > +    etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:2379' get "" --from-key --keys-only -w json --limit 1 +  environment: +    ETCDCTL_API: 3 +  register: l_etcdctl_output + +- fail: +    msg: "Unable to get a number of v3 keys" +  when: l_etcdctl_output.rc != 0 + +- fail: +    msg: "The etcd has at least one v3 key" +  when: "'count' in (l_etcdctl_output.stdout | from_json) and (l_etcdctl_output.stdout | from_json).count != 0" + + +# TODO(jchaloup): once the until loop can be used over include/block, +#                 remove the repetive code +# - until loop not supported over include statement (nor block) +#   https://github.com/ansible/ansible/issues/17098 +# - with_items not supported over block + +# Check the cluster status for the first time +- include: check_cluster_status.yml + +# Check the cluster status for the second time +- block: +  - debug: +      msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" +  - name: Wait a while before another check +    pause: +      seconds: 5 +    when: not l_etcd_cluster_status_ok | bool + +  - include: check_cluster_status.yml +    when: not l_etcd_cluster_status_ok | bool + + +# Check the cluster status for the third time +- block: +  - debug: +      msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" +  - name: Wait a while before another check +    pause: +      seconds: 5 +    when: not l_etcd_cluster_status_ok | bool + +  - include: check_cluster_status.yml +    when: not l_etcd_cluster_status_ok | bool diff --git a/roles/etcd_migrate/tasks/check_cluster_health.yml b/roles/etcd_migrate/tasks/check_cluster_health.yml new file mode 100644 index 000000000..1abd6a32f --- /dev/null +++ b/roles/etcd_migrate/tasks/check_cluster_health.yml @@ -0,0 +1,23 @@ +--- +- name: Check cluster health +  command: > +    etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt --endpoint https://{{ etcd_peer }}:2379 cluster-health +  register: etcd_cluster_health +  changed_when: false +  failed_when: false + +- name: Assume a member is not healthy +  set_fact: +    etcd_member_healthy: false + +- name: Get member item health status +  set_fact: +    etcd_member_healthy: true +  with_items: "{{ etcd_cluster_health.stdout_lines }}" +  when: "(etcd_peer in item) and ('is healthy' in item)" + +- name: Check the etcd cluster health +  # TODO(jchaloup): should we fail or ask user if he wants to continue? Or just wait until the cluster is healthy? +  fail: +    msg: "Etcd member {{ etcd_peer }} is not healthy" +  when: not etcd_member_healthy diff --git a/roles/etcd_migrate/tasks/check_cluster_status.yml b/roles/etcd_migrate/tasks/check_cluster_status.yml new file mode 100644 index 000000000..90fe385c1 --- /dev/null +++ b/roles/etcd_migrate/tasks/check_cluster_status.yml @@ -0,0 +1,32 @@ +--- +# etcd_ip originates from etcd_common role +- name: Check cluster status +  command: > +    etcdctl --cert /etc/etcd/peer.crt --key /etc/etcd/peer.key --cacert /etc/etcd/ca.crt --endpoints 'https://{{ etcd_peer }}:2379' -w json endpoint status +  environment: +    ETCDCTL_API: 3 +  register: l_etcd_cluster_status + +- name: Retrieve raftIndex +  set_fact: +    etcd_member_raft_index: "{{ (l_etcd_cluster_status.stdout | from_json)[0]['Status']['raftIndex'] }}" + +- block: +  # http://docs.ansible.com/ansible/playbooks_filters.html#extracting-values-from-containers +  - name: Group all raftIndices into a list +    set_fact: +      etcd_members_raft_indices: "{{ groups['oo_etcd_to_config'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}" + +  - name: Check the minimum and the maximum of raftIndices is at most 1 +    set_fact: +      etcd_members_raft_indices_diff: "{{ ((etcd_members_raft_indices | max | int) - (etcd_members_raft_indices | min | int)) | int }}" + +  - debug: +      msg: "Raft indices difference: {{ etcd_members_raft_indices_diff }}" + +  when: inventory_hostname in groups.oo_etcd_to_config[0] + +# The cluster raft status is ok if the difference of the max and min raft index is at most 1 +- name: capture the status +  set_fact: +    l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_config[0]]['etcd_members_raft_indices_diff'] | int < 2 }}" diff --git a/roles/etcd_migrate/tasks/configure.yml b/roles/etcd_migrate/tasks/configure.yml new file mode 100644 index 000000000..a305d5bf3 --- /dev/null +++ b/roles/etcd_migrate/tasks/configure.yml @@ -0,0 +1,13 @@ +--- +- name: Configure master to use etcd3 storage backend +  yedit: +    src: /etc/origin/master/master-config.yaml +    key: "{{ item.key }}" +    value: "{{ item.value }}" +  with_items: +    - key: kubernetesMasterConfig.apiServerArguments.storage-backend +      value: +        - etcd3 +    - key: kubernetesMasterConfig.apiServerArguments.storage-media-type +      value: +        - application/vnd.kubernetes.protobuf diff --git a/roles/etcd_migrate/tasks/main.yml b/roles/etcd_migrate/tasks/main.yml new file mode 100644 index 000000000..409b0b613 --- /dev/null +++ b/roles/etcd_migrate/tasks/main.yml @@ -0,0 +1,25 @@ +--- +- name: Fail if invalid r_etcd_migrate_action provided +  fail: +    msg: "etcd_migrate role can only be called with 'check' or 'migrate' or 'configure'" +  when: r_etcd_migrate_action not in ['check', 'migrate', 'configure'] + +- name: Include main action task file +  include: "{{ r_etcd_migrate_action }}.yml" + +# 2. migrate v2 datadir into v3: +#   ETCDCTL_API=3 ./etcdctl migrate  --data-dir=${data_dir} --no-ttl +#   backup the etcd datadir first +#   Provide a way for an operator to specify transformer + +# 3. re-configure OpenShift master at /etc/origin/master/master-config.yml +#   set storage-backend to “etcd3” +# 4. we could leave the master restart to current logic (there is already the code ready (single vs. HA master)) + +# Run +# etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt --endpoint https://172.16.186.45:2379 cluster-health +# to check the cluster health (from the etcdctl.sh aliases file) + +# Another assumption: +# - in order to migrate all etcd v2 data into v3, we need to shut down the cluster (let's verify that on Wednesday meeting) +# - diff --git a/roles/etcd_migrate/tasks/migrate.yml b/roles/etcd_migrate/tasks/migrate.yml new file mode 100644 index 000000000..cb479b0cc --- /dev/null +++ b/roles/etcd_migrate/tasks/migrate.yml @@ -0,0 +1,53 @@ +--- +# Should this be run in a serial manner? +- set_fact: +    l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}" + +- name: Disable etcd members +  service: +    name: "{{ l_etcd_service }}" +    state: stopped + +# Should we skip all TTL keys? https://bugzilla.redhat.com/show_bug.cgi?id=1389773 +- name: Migrate etcd data +  command: > +    etcdctl migrate --data-dir={{ etcd_data_dir }} +  environment: +    ETCDCTL_API: 3 +  register: l_etcdctl_migrate + +# TODO(jchaloup): If any of the members fails, we need to restore all members to v2 from the pre-migrate backup +- name: Check the etcd v2 data are correctly migrated +  fail: +    msg: "Failed to migrate a member" +  when: "'finished transforming keys' not in l_etcdctl_migrate.stdout" + +# TODO(jchaloup): start the etcd on a different port so noone can access it +# Once the validation is done +- name: Enable etcd member +  service: +    name: "{{ l_etcd_service }}" +    state: started + +- name: Re-introduce leases (as a replacement for key TTLs) +  command: > +    oadm migrate etcd-ttl \ +    --cert {{ etcd_peer_cert_file }} \ +    --key {{ etcd_peer_key_file }} \ +    --cacert {{ etcd_peer_ca_file }} \ +    --etcd-address 'https://{{ etcd_peer }}:2379' \ +    --ttl-keys-prefix {{ item }} \ +    --lease-duration 1h +  environment: +    ETCDCTL_API: 3 +  with_items: +  - "/kubernetes.io/events" +  - "/kubernetes.io/masterleases" + +- set_fact: +    r_etcd_migrate_success: true + +- name: Enable etcd member +  service: +    name: "{{ l_etcd_service }}" +    state: started diff --git a/roles/lib_openshift/library/oc_obj.py b/roles/lib_openshift/library/oc_obj.py index 56af303cc..9b0c0e0e4 100644 --- a/roles/lib_openshift/library/oc_obj.py +++ b/roles/lib_openshift/library/oc_obj.py @@ -90,9 +90,9 @@ options:      required: false      default: str      aliases: [] -  all_namespace: +  all_namespaces:      description: -    - The namespace where the object lives. +    - Search in all namespaces for the object.      required: false      default: false      aliases: [] diff --git a/roles/lib_openshift/src/doc/obj b/roles/lib_openshift/src/doc/obj index 4ff912b2d..c6504ed01 100644 --- a/roles/lib_openshift/src/doc/obj +++ b/roles/lib_openshift/src/doc/obj @@ -39,9 +39,9 @@ options:      required: false      default: str      aliases: [] -  all_namespace: +  all_namespaces:      description: -    - The namespace where the object lives. +    - Search in all namespaces for the object.      required: false      default: false      aliases: [] diff --git a/roles/openshift_cfme/README.md b/roles/openshift_cfme/README.md new file mode 100644 index 000000000..8283afed6 --- /dev/null +++ b/roles/openshift_cfme/README.md @@ -0,0 +1,404 @@ +# OpenShift-Ansible - CFME Role + +# PROOF OF CONCEPT - Alpha Version + +This role is based on the work in the upstream +[manageiq/manageiq-pods](https://github.com/ManageIQ/manageiq-pods) +project. For additional literature on configuration specific to +ManageIQ (optional post-installation tasks), visit the project's +[upstream documentation page](http://manageiq.org/docs/get-started/basic-configuration). + +Please submit a +[new issue](https://github.com/openshift/openshift-ansible/issues/new) +if you run into bugs with this role or wish to request enhancements. + +# Important Notes + +This is an early *proof of concept* role to install the Cloud Forms +Management Engine (ManageIQ) on OpenShift Container Platform (OCP). + +* This role is still in **ALPHA STATUS** +* Many options are hard-coded still (ex: NFS setup) +* Not many configurable options yet +* **Should** be ran on a dedicated cluster +* **Will not run** on undersized infra +* The terms *CFME* and *MIQ* / *ManageIQ* are interchangeable + +## Requirements + +**NOTE:** These requirements are copied from the upstream +[manageiq/manageiq-pods](https://github.com/ManageIQ/manageiq-pods) +project. + +### Prerequisites: + +* +  [OpenShift Origin 1.5](https://docs.openshift.com/container-platform/3.5/welcome/index.html) +  or +  [higher](https://docs.openshift.com/container-platform/latest/welcome/index.html) +  provisioned +* NFS or other compatible volume provider +* A cluster-admin user (created by role if required) + +### Cluster Sizing + +In order to avoid random deployment failures due to resource +starvation, we recommend a minimum cluster size for a **test** +environment. + +| Type           | Size    | CPUs     | Memory   | +|----------------|---------|----------|----------| +| Masters        | `1+`    | `8`      | `12GB`   | +| Nodes          | `2+`    | `4`      | `8GB`    | +| PV Storage     | `25GB`  | `N/A`    | `N/A`    | + + + + +**CFME has hard-requirements for memory. CFME will NOT install if your +  infrastructure does not meet or exceed the requirements given +  above. Do not run this playbook if you do not have the required +  memory, you will just waste your time.** + + +### Other sizing considerations + +* Recommendations assume MIQ will be the **only application running** +  on this cluster. +* Alternatively, you can provision an infrastructure node to run +  registry/metrics/router/logging pods. +* Each MIQ application pod will consume at least `3GB` of RAM on initial +  deployment (blank deployment without providers). +* RAM consumption will ramp up higher depending on appliance use, once +  providers are added expect higher resource consumption. + + +### Assumptions + +1) You meet/exceed the [cluster sizing](#cluster-sizing) requirements +1) Your NFS server is on your master host +1) Your PV backing NFS storage volume is mounted on `/exports/` + +Required directories that NFS will export to back the PVs: + +* `/exports/miq-pv0[123]` + +If the required directories are not present at install-time, they will +be created using the recommended permissions per the +[upstream documentation](https://github.com/ManageIQ/manageiq-pods#make-persistent-volumes-to-host-the-miq-database-and-application-data): + +* UID/GID: `root`/`root` +* Mode: `0775` + +**IMPORTANT:** If you are using a separate volume (`/dev/vdX`) for NFS +  storage, **ensure** it is mounted on `/exports/` **before** running +  this role. + + + +## Role Variables + +Core variables in this role: + +| Name                          | Default value | Description   | +|-------------------------------|---------------|---------------| +| `openshift_cfme_install_app`  | `False`       | `True`: Install everything and create a new CFME app, `False`: Just install all of the templates and scaffolding | + + +Variables you may override have defaults defined in +[defaults/main.yml](defaults/main.yml). + + +# Important Notes + +This is a **tech preview** status role presently. Use it with the same +caution you would give any other pre-release software. + +**Most importantly** follow this one rule: don't re-run the entrypoint +playbook multiple times in a row without cleaning up after previous +runs if some of the CFME steps have ran. This is a known +flake. Cleanup instructions are provided at the bottom of this README. + + +# Usage + +This section describes the basic usage of this role. All parameters +will use their [default values](defaults/main.yml). + +## Pre-flight Checks + +**IMPORTANT:** As documented above in [the prerequisites](#prerequisites), +  you **must already** have your OCP cluster up and running. + +**Optional:** The ManageIQ pod is fairly large (about 1.7 GB) so to +save some spin-up time post-deployment, you can begin pre-pulling the +docker image to each of your nodes now: + +``` +root@node0x # docker pull docker.io/manageiq/manageiq-pods:app-latest-fine +``` + +## Getting Started + +1) The *entry point playbook* to install CFME is located in +[the BYO playbooks](../../playbooks/byo/openshift-cfme/config.yml) +directory + +2) Update your existing `hosts` inventory file and ensure the +parameter `openshift_cfme_install_app` is set to `True` under the +`[OSEv3:vars]` block. + +2) Using your existing `hosts` inventory file, run `ansible-playbook` +with the entry point playbook: + +``` +$ ansible-playbook -v -i <INVENTORY_FILE> playbooks/byo/openshift-cfme/config.yml +``` + +## Next Steps + +Once complete, the playbook will let you know: + + +``` +TASK [openshift_cfme : Status update] ********************************************************* +ok: [ho.st.na.me] => { +    "msg": "CFME has been deployed. Note that there will be a delay before it is fully initialized.\n" +} +``` + +This will take several minutes (*possibly 10 or more*, depending on +your network connection). However, you can get some insight into the +deployment process during initialization. + +### oc describe pod manageiq-0 + +*Some useful information about the output you will see if you run the +`oc describe pod manageiq-0` command* + +**Readiness probe**s - These will take a while to become +`Healthy`. The initial health probes won't even happen for at least 8 +minutes depending on how long it takes you to pull down the large +images. ManageIQ is a large application so it may take a considerable +amount of time for it to deploy and be marked as `Healthy`. + +If you go to the node you know the application is running on (check +for `Successfully assigned manageiq-0 to <HOST|IP>` in the `describe` +output) you can run a `docker pull` command to monitor the progress of +the image pull: + +``` +[root@cfme-node ~]# docker pull docker.io/manageiq/manageiq-pods:app-latest-fine +Trying to pull repository docker.io/manageiq/manageiq-pods ... +sha256:6c055ca9d3c65cd694d6c0e28986b5239ba56bbdf0488cccdaa283d545258f8a: Pulling from docker.io/manageiq/manageiq-pods +Digest: sha256:6c055ca9d3c65cd694d6c0e28986b5239ba56bbdf0488cccdaa283d545258f8a +Status: Image is up to date for docker.io/manageiq/manageiq-pods:app-latest-fine +``` + +The example above demonstrates the case where the image has been +successfully pulled already. + +If the image isn't completely pulled already then you will see +multiple progress bars detailing each image layer download status. + + +### rsh + +*Useful inspection/progress monitoring techniques with the `oc rsh` +command.* + + +On your master node, switch to the `cfme` project (or whatever you +named it if you overrode the `openshift_cfme_project` variable) and +check on the pod states: + +``` +[root@cfme-master01 ~]# oc project cfme +Now using project "cfme" on server "https://10.10.0.100:8443". + +[root@cfme-master01 ~]# oc get pod +NAME                 READY     STATUS    RESTARTS   AGE +manageiq-0           0/1       Running   0          14m +memcached-1-3lk7g    1/1       Running   0          14m +postgresql-1-12slb   1/1       Running   0          14m +``` + +Note how the `manageiq-0` pod says `0/1` under the **READY** +column. After some time (depending on your network connection) you'll +be able to `rsh` into the pod to find out more of what's happening in +real time. First, the easy-mode command, run this once `rsh` is +available and then watch until it says `Started Initialize Appliance +Database`: + +``` +[root@cfme-master01 ~]# oc rsh manageiq-0 journalctl -f -u appliance-initialize.service +``` + +For the full explanation of what this means, and more interactive +inspection techniques, keep reading on. + +To obtain a shell on our `manageiq` pod we use this command: + +``` +[root@cfme-master01 ~]# oc rsh manageiq-0 bash -l +``` + +The `rsh` command opens a shell in your pod for you. In this case it's +the pod called `manageiq-0`. `systemd` is managing the services in +this pod so we can use the `list-units` command to see what is running +currently: `# systemctl list-units | grep appliance`. + +If you see the `appliance-initialize` service running, this indicates +that basic setup is still in progress. We can monitor the process with +the `journalctl` command like so: + + +``` +[root@manageiq-0 vmdb]# journalctl -f -u appliance-initialize.service +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Checking deployment status == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: No pre-existing EVM configuration found on region PV +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Checking for existing data on server PV == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Starting New Deployment == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Applying memcached config == +Jun 14 14:55:53 manageiq-0 appliance-initialize.sh[58]: == Initializing Appliance == +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: create encryption key +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: configuring external database +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: Checking for connections to the database... +Jun 14 14:56:09 manageiq-0 appliance-initialize.sh[58]: Create region starting +Jun 14 14:58:15 manageiq-0 appliance-initialize.sh[58]: Create region complete +Jun 14 14:58:15 manageiq-0 appliance-initialize.sh[58]: == Initializing PV data == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: == Initializing PV data backup == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: sending incremental file list +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: created directory /persistent/server-deploy/backup/backup_2017_06_14_145816 +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/REGION +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/certs/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/certs/v2_key +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/config/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/config/database.yml +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/vmdb/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/vmdb/GUID +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: sent 1330 bytes  received 136 bytes  2932.00 bytes/sec +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: total size is 770  speedup is 0.53 +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: == Restoring PV data symlinks == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/REGION symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/config/database.yml symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/certs/v2_key symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/log symlink is already in place, skipping +Jun 14 14:58:28 manageiq-0 systemctl[304]: Removed symlink /etc/systemd/system/multi-user.target.wants/appliance-initialize.service. +Jun 14 14:58:29 manageiq-0 systemd[1]: Started Initialize Appliance Database. +``` + +Most of what we see here (above) is the initial database seeding +process. This process isn't very quick, so be patient. + +At the bottom of the log there is a special line from the `systemctl` +service, `Removed symlink +/etc/systemd/system/multi-user.target.wants/appliance-initialize.service`. The +`appliance-initialize` service is no longer marked as enabled. This +indicates that the base application initialization is complete now. + +We're not done yet though, there are other ancillary services which +run in this pod to support the application. *Still in the rsh shell*, +Use the `ps` command to monitor for the `httpd` processes +starting. You will see output similar to the following when that stage +has completed: + +``` +[root@manageiq-0 vmdb]# ps aux | grep http +root       1941  0.0  0.1 249820  7640 ?        Ss   15:02   0:00 /usr/sbin/httpd -DFOREGROUND +apache     1942  0.0  0.0 250752  6012 ?        S    15:02   0:00 /usr/sbin/httpd -DFOREGROUND +apache     1943  0.0  0.0 250472  5952 ?        S    15:02   0:00 /usr/sbin/httpd -DFOREGROUND +apache     1944  0.0  0.0 250472  5916 ?        S    15:02   0:00 /usr/sbin/httpd -DFOREGROUND +apache     1945  0.0  0.0 250360  5764 ?        S    15:02   0:00 /usr/sbin/httpd -DFOREGROUND +``` + +Furthermore, you can find other related processes by just looking for +ones with `MIQ` in their name: + +``` +[root@manageiq-0 vmdb]# ps aux | grep miq +root        333 27.7  4.2 555884 315916 ?       Sl   14:58   3:59 MIQ Server +root       1976  0.6  4.0 507224 303740 ?       SNl  15:02   0:03 MIQ: MiqGenericWorker id: 1, queue: generic +root       1984  0.6  4.0 507224 304312 ?       SNl  15:02   0:03 MIQ: MiqGenericWorker id: 2, queue: generic +root       1992  0.9  4.0 508252 304888 ?       SNl  15:02   0:05 MIQ: MiqPriorityWorker id: 3, queue: generic +root       2000  0.7  4.0 510308 304696 ?       SNl  15:02   0:04 MIQ: MiqPriorityWorker id: 4, queue: generic +root       2008  1.2  4.0 514000 303612 ?       SNl  15:02   0:07 MIQ: MiqScheduleWorker id: 5 +root       2026  0.2  4.0 517504 303644 ?       SNl  15:02   0:01 MIQ: MiqEventHandler id: 6, queue: ems +root       2036  0.2  4.0 518532 303768 ?       SNl  15:02   0:01 MIQ: MiqReportingWorker id: 7, queue: reporting +root       2044  0.2  4.0 519560 303812 ?       SNl  15:02   0:01 MIQ: MiqReportingWorker id: 8, queue: reporting +root       2059  0.2  4.0 528372 303956 ?       SNl  15:02   0:01 puma 3.3.0 (tcp://127.0.0.1:5000) [MIQ: Web Server Worker] +root       2067  0.9  4.0 529664 305716 ?       SNl  15:02   0:05 puma 3.3.0 (tcp://127.0.0.1:3000) [MIQ: Web Server Worker] +root       2075  0.2  4.0 529408 304056 ?       SNl  15:02   0:01 puma 3.3.0 (tcp://127.0.0.1:4000) [MIQ: Web Server Worker] +root       2329  0.0  0.0  10640   972 ?        S+   15:13   0:00 grep --color=auto -i miq +``` + +Finally, *still in the rsh shell*, to test if the application is +running correctly, we can request the application homepage. If the +page is available the page title will be `ManageIQ: Login`: + +``` +[root@manageiq-0 vmdb]# curl -s -k https://localhost | grep -A2 '<title>' +<title> +ManageIQ: Login +</title> +``` + +**Note:** The `-s` flag makes `curl` operations silent and the `-k` +flag to ignore errors about untrusted certificates. + + + +# Additional Upstream Resources + +Below are some useful resources from the upstream project +documentation. You may find these of value. + +* [Verify Setup Was Successful](https://github.com/ManageIQ/manageiq-pods#verifying-the-setup-was-successful) +* [POD Access And Routes](https://github.com/ManageIQ/manageiq-pods#pod-access-and-routes) +* [Troubleshooting](https://github.com/ManageIQ/manageiq-pods#troubleshooting) + + +# Manual Cleanup + +At this time uninstallation/cleanup is still a manual process. You +will have to follow a few steps to fully remove CFME from your +cluster. + +Delete the project: + +* `oc delete project cfme` + +Delete the PVs: + +* `oc delete pv miq-pv01` +* `oc delete pv miq-pv02` +* `oc delete pv miq-pv03` + +Clean out the old PV data: + +* `cd /exports/` +* `find miq* -type f -delete` +* `find miq* -type d -delete` + +Remove the NFS exports: + +* `rm /etc/exports.d/openshift_cfme.exports` +* `exportfs -ar` + +Delete the user: + +* `oc delete user cfme` + +**NOTE:** The `oc delete project cfme` command will return quickly +however it will continue to operate in the background. Continue +running `oc get project` after you've completed the other steps to +monitor the pods and final project termination progress. diff --git a/roles/openshift_cfme/defaults/main.yml b/roles/openshift_cfme/defaults/main.yml new file mode 100644 index 000000000..493e1ef68 --- /dev/null +++ b/roles/openshift_cfme/defaults/main.yml @@ -0,0 +1,38 @@ +--- +# Namespace for the CFME project +openshift_cfme_project: cfme +# Namespace/project description +openshift_cfme_project_description: ManageIQ - CloudForms Management Engine +# Basic user assigned the `admin` role for the project +openshift_cfme_user: cfme +# Project system account for enabling privileged pods +openshift_cfme_service_account: "system:serviceaccount:{{ openshift_cfme_project }}:default" +# All the required exports +openshift_cfme_pv_exports: +  - miq-pv01 +  - miq-pv02 +  - miq-pv03 +# PV template files and their created object names +openshift_cfme_pv_data: +  - pv_name: miq-pv01 +    pv_template: miq-pv-db.yaml +    pv_label: CFME DB PV +  - pv_name: miq-pv02 +    pv_template: miq-pv-region.yaml +    pv_label: CFME Region PV +  - pv_name: miq-pv03 +    pv_template: miq-pv-server.yaml +    pv_label: CFME Server PV + +# Tuning parameter to use more than 5 images at once from an ImageStream +openshift_cfme_maxImagesBulkImportedPerRepository: 100 +# Hostname/IP of the NFS server. Currently defaults to first master +openshift_cfme_nfs_server: "{{ groups.nfs.0 }}" +# TODO: Refactor '_install_app' variable. This is just for testing but +# maybe in the future it should control the entire yes/no for CFME. +# +# Whether or not the manageiq app should be initialized ('oc new-app +# --template=manageiq). If False everything UP TO 'new-app' is ran. +openshift_cfme_install_app: False +# Docker image to pull +openshift_cfme_container_image: "docker.io/manageiq/manageiq-pods:app-latest-fine" diff --git a/roles/openshift_cfme/files/miq-template.yaml b/roles/openshift_cfme/files/miq-template.yaml new file mode 100644 index 000000000..8f0d2af38 --- /dev/null +++ b/roles/openshift_cfme/files/miq-template.yaml @@ -0,0 +1,566 @@ +--- +path: /tmp/miq-template-out +data: +  apiVersion: v1 +  kind: Template +  labels: +    template: manageiq +  metadata: +    name: manageiq +    annotations: +      description: "ManageIQ appliance with persistent storage" +      tags: "instant-app,manageiq,miq" +      iconClass: "icon-rails" +  objects: +  - apiVersion: v1 +    kind: Secret +    metadata: +      name: "${NAME}-secrets" +    stringData: +      pg-password: "${DATABASE_PASSWORD}" +  - apiVersion: v1 +    kind: Service +    metadata: +      annotations: +        description: "Exposes and load balances ManageIQ pods" +        service.alpha.openshift.io/dependencies: '[{"name":"${DATABASE_SERVICE_NAME}","namespace":"","kind":"Service"},{"name":"${MEMCACHED_SERVICE_NAME}","namespace":"","kind":"Service"}]' +      name: ${NAME} +    spec: +      clusterIP: None +      ports: +      - name: http +        port: 80 +        protocol: TCP +        targetPort: 80 +      - name: https +        port: 443 +        protocol: TCP +        targetPort: 443 +      selector: +        name: ${NAME} +  - apiVersion: v1 +    kind: Route +    metadata: +      name: ${NAME} +    spec: +      host: ${APPLICATION_DOMAIN} +      port: +        targetPort: https +      tls: +        termination: passthrough +      to: +        kind: Service +        name: ${NAME} +  - apiVersion: v1 +    kind: ImageStream +    metadata: +      name: miq-app +      annotations: +        description: "Keeps track of the ManageIQ image changes" +    spec: +      dockerImageRepository: "${APPLICATION_IMG_NAME}" +  - apiVersion: v1 +    kind: ImageStream +    metadata: +      name: miq-postgresql +      annotations: +        description: "Keeps track of the PostgreSQL image changes" +    spec: +      dockerImageRepository: "${POSTGRESQL_IMG_NAME}" +  - apiVersion: v1 +    kind: ImageStream +    metadata: +      name: miq-memcached +      annotations: +        description: "Keeps track of the Memcached image changes" +    spec: +      dockerImageRepository: "${MEMCACHED_IMG_NAME}" +  - apiVersion: v1 +    kind: PersistentVolumeClaim +    metadata: +      name: "${NAME}-${DATABASE_SERVICE_NAME}" +    spec: +      accessModes: +        - ReadWriteOnce +      resources: +        requests: +          storage: ${DATABASE_VOLUME_CAPACITY} +  - apiVersion: v1 +    kind: PersistentVolumeClaim +    metadata: +      name: "${NAME}-region" +    spec: +      accessModes: +        - ReadWriteOnce +      resources: +        requests: +          storage: ${APPLICATION_REGION_VOLUME_CAPACITY} +  - apiVersion: apps/v1beta1 +    kind: "StatefulSet" +    metadata: +      name: ${NAME} +      annotations: +        description: "Defines how to deploy the ManageIQ appliance" +    spec: +      serviceName: "${NAME}" +      replicas: "${APPLICATION_REPLICA_COUNT}" +      template: +        metadata: +          labels: +            name: ${NAME} +          name: ${NAME} +        spec: +          containers: +          - name: manageiq +            image: "${APPLICATION_IMG_NAME}:${APPLICATION_IMG_TAG}" +            livenessProbe: +              tcpSocket: +                port: 443 +              initialDelaySeconds: 480 +              timeoutSeconds: 3 +            readinessProbe: +              httpGet: +                path: / +                port: 443 +                scheme: HTTPS +              initialDelaySeconds: 200 +              timeoutSeconds: 3 +            ports: +            - containerPort: 80 +              protocol: TCP +            - containerPort: 443 +              protocol: TCP +            securityContext: +              privileged: true +            volumeMounts: +                - +                  name: "${NAME}-server" +                  mountPath: "/persistent" +                - +                  name: "${NAME}-region" +                  mountPath: "/persistent-region" +            env: +              - +                name: "APPLICATION_INIT_DELAY" +                value: "${APPLICATION_INIT_DELAY}" +              - +                name: "DATABASE_SERVICE_NAME" +                value: "${DATABASE_SERVICE_NAME}" +              - +                name: "DATABASE_REGION" +                value: "${DATABASE_REGION}" +              - +                name: "MEMCACHED_SERVICE_NAME" +                value: "${MEMCACHED_SERVICE_NAME}" +              - +                name: "POSTGRESQL_USER" +                value: "${DATABASE_USER}" +              - +                name: "POSTGRESQL_PASSWORD" +                valueFrom: +                  secretKeyRef: +                    name: "${NAME}-secrets" +                    key: "pg-password" +              - +                name: "POSTGRESQL_DATABASE" +                value: "${DATABASE_NAME}" +              - +                name: "POSTGRESQL_MAX_CONNECTIONS" +                value: "${POSTGRESQL_MAX_CONNECTIONS}" +              - +                name: "POSTGRESQL_SHARED_BUFFERS" +                value: "${POSTGRESQL_SHARED_BUFFERS}" +            resources: +              requests: +                memory: "${APPLICATION_MEM_REQ}" +                cpu: "${APPLICATION_CPU_REQ}" +              limits: +                memory: "${APPLICATION_MEM_LIMIT}" +            lifecycle: +              preStop: +                exec: +                  command: +                    - /opt/manageiq/container-scripts/sync-pv-data +          volumes: +           - +             name: "${NAME}-region" +             persistentVolumeClaim: +               claimName: ${NAME}-region +      volumeClaimTemplates: +        - metadata: +            name: "${NAME}-server" +            annotations: +              # Uncomment this if using dynamic volume provisioning. +              # https://docs.openshift.org/latest/install_config/persistent_storage/dynamically_provisioning_pvs.html +              # volume.alpha.kubernetes.io/storage-class: anything +          spec: +            accessModes: [ ReadWriteOnce ] +            resources: +              requests: +                storage: "${APPLICATION_VOLUME_CAPACITY}" +  - apiVersion: v1 +    kind: "Service" +    metadata: +      name: "${MEMCACHED_SERVICE_NAME}" +      annotations: +        description: "Exposes the memcached server" +    spec: +      ports: +        - +          name: "memcached" +          port: 11211 +          targetPort: 11211 +      selector: +        name: "${MEMCACHED_SERVICE_NAME}" +  - apiVersion: v1 +    kind: "DeploymentConfig" +    metadata: +      name: "${MEMCACHED_SERVICE_NAME}" +      annotations: +        description: "Defines how to deploy memcached" +    spec: +      strategy: +        type: "Recreate" +      triggers: +        - +          type: "ImageChange" +          imageChangeParams: +            automatic: true +            containerNames: +              - "memcached" +            from: +              kind: "ImageStreamTag" +              name: "miq-memcached:${MEMCACHED_IMG_TAG}" +        - +          type: "ConfigChange" +      replicas: 1 +      selector: +        name: "${MEMCACHED_SERVICE_NAME}" +      template: +        metadata: +          name: "${MEMCACHED_SERVICE_NAME}" +          labels: +            name: "${MEMCACHED_SERVICE_NAME}" +        spec: +          volumes: [] +          containers: +            - +              name: "memcached" +              image: "${MEMCACHED_IMG_NAME}:${MEMCACHED_IMG_TAG}" +              ports: +                - +                  containerPort: 11211 +              readinessProbe: +                timeoutSeconds: 1 +                initialDelaySeconds: 5 +                tcpSocket: +                  port: 11211 +              livenessProbe: +                timeoutSeconds: 1 +                initialDelaySeconds: 30 +                tcpSocket: +                  port: 11211 +              volumeMounts: [] +              env: +                - +                  name: "MEMCACHED_MAX_MEMORY" +                  value: "${MEMCACHED_MAX_MEMORY}" +                - +                  name: "MEMCACHED_MAX_CONNECTIONS" +                  value: "${MEMCACHED_MAX_CONNECTIONS}" +                - +                  name: "MEMCACHED_SLAB_PAGE_SIZE" +                  value: "${MEMCACHED_SLAB_PAGE_SIZE}" +              resources: +                requests: +                  memory: "${MEMCACHED_MEM_REQ}" +                  cpu: "${MEMCACHED_CPU_REQ}" +                limits: +                  memory: "${MEMCACHED_MEM_LIMIT}" +  - apiVersion: v1 +    kind: "Service" +    metadata: +      name: "${DATABASE_SERVICE_NAME}" +      annotations: +        description: "Exposes the database server" +    spec: +      ports: +        - +          name: "postgresql" +          port: 5432 +          targetPort: 5432 +      selector: +        name: "${DATABASE_SERVICE_NAME}" +  - apiVersion: v1 +    kind: "DeploymentConfig" +    metadata: +      name: "${DATABASE_SERVICE_NAME}" +      annotations: +        description: "Defines how to deploy the database" +    spec: +      strategy: +        type: "Recreate" +      triggers: +        - +          type: "ImageChange" +          imageChangeParams: +            automatic: true +            containerNames: +              - "postgresql" +            from: +              kind: "ImageStreamTag" +              name: "miq-postgresql:${POSTGRESQL_IMG_TAG}" +        - +          type: "ConfigChange" +      replicas: 1 +      selector: +        name: "${DATABASE_SERVICE_NAME}" +      template: +        metadata: +          name: "${DATABASE_SERVICE_NAME}" +          labels: +            name: "${DATABASE_SERVICE_NAME}" +        spec: +          volumes: +            - +              name: "miq-pgdb-volume" +              persistentVolumeClaim: +                claimName: "${NAME}-${DATABASE_SERVICE_NAME}" +          containers: +            - +              name: "postgresql" +              image: "${POSTGRESQL_IMG_NAME}:${POSTGRESQL_IMG_TAG}" +              ports: +                - +                  containerPort: 5432 +              readinessProbe: +                timeoutSeconds: 1 +                initialDelaySeconds: 15 +                exec: +                  command: +                    - "/bin/sh" +                    - "-i" +                    - "-c" +                    - "psql -h 127.0.0.1 -U ${POSTGRESQL_USER} -q -d ${POSTGRESQL_DATABASE} -c 'SELECT 1'" +              livenessProbe: +                timeoutSeconds: 1 +                initialDelaySeconds: 60 +                tcpSocket: +                  port: 5432 +              volumeMounts: +                - +                  name: "miq-pgdb-volume" +                  mountPath: "/var/lib/pgsql/data" +              env: +                - +                  name: "POSTGRESQL_USER" +                  value: "${DATABASE_USER}" +                - +                  name: "POSTGRESQL_PASSWORD" +                  valueFrom: +                    secretKeyRef: +                      name: "${NAME}-secrets" +                      key: "pg-password" +                - +                  name: "POSTGRESQL_DATABASE" +                  value: "${DATABASE_NAME}" +                - +                  name: "POSTGRESQL_MAX_CONNECTIONS" +                  value: "${POSTGRESQL_MAX_CONNECTIONS}" +                - +                  name: "POSTGRESQL_SHARED_BUFFERS" +                  value: "${POSTGRESQL_SHARED_BUFFERS}" +              resources: +                requests: +                  memory: "${POSTGRESQL_MEM_REQ}" +                  cpu: "${POSTGRESQL_CPU_REQ}" +                limits: +                  memory: "${POSTGRESQL_MEM_LIMIT}" + +  parameters: +    - +      name: "NAME" +      displayName: Name +      required: true +      description: "The name assigned to all of the frontend objects defined in this template." +      value: manageiq +    - +      name: "DATABASE_SERVICE_NAME" +      displayName: "PostgreSQL Service Name" +      required: true +      description: "The name of the OpenShift Service exposed for the PostgreSQL container." +      value: "postgresql" +    - +      name: "DATABASE_USER" +      displayName: "PostgreSQL User" +      required: true +      description: "PostgreSQL user that will access the database." +      value: "root" +    - +      name: "DATABASE_PASSWORD" +      displayName: "PostgreSQL Password" +      required: true +      description: "Password for the PostgreSQL user." +      from: "[a-zA-Z0-9]{8}" +      generate: expression +    - +      name: "DATABASE_NAME" +      required: true +      displayName: "PostgreSQL Database Name" +      description: "Name of the PostgreSQL database accessed." +      value: "vmdb_production" +    - +      name: "DATABASE_REGION" +      required: true +      displayName: "Application Database Region" +      description: "Database region that will be used for application." +      value: "0" +    - +      name: "MEMCACHED_SERVICE_NAME" +      required: true +      displayName: "Memcached Service Name" +      description: "The name of the OpenShift Service exposed for the Memcached container." +      value: "memcached" +    - +      name: "MEMCACHED_MAX_MEMORY" +      displayName: "Memcached Max Memory" +      description: "Memcached maximum memory for memcached object storage in MB." +      value: "64" +    - +      name: "MEMCACHED_MAX_CONNECTIONS" +      displayName: "Memcached Max Connections" +      description: "Memcached maximum number of connections allowed." +      value: "1024" +    - +      name: "MEMCACHED_SLAB_PAGE_SIZE" +      displayName: "Memcached Slab Page Size" +      description: "Memcached size of each slab page." +      value: "1m" +    - +      name: "POSTGRESQL_MAX_CONNECTIONS" +      displayName: "PostgreSQL Max Connections" +      description: "PostgreSQL maximum number of database connections allowed." +      value: "100" +    - +      name: "POSTGRESQL_SHARED_BUFFERS" +      displayName: "PostgreSQL Shared Buffer Amount" +      description: "Amount of memory dedicated for PostgreSQL shared memory buffers." +      value: "256MB" +    - +      name: "APPLICATION_CPU_REQ" +      displayName: "Application Min CPU Requested" +      required: true +      description: "Minimum amount of CPU time the Application container will need (expressed in millicores)." +      value: "1000m" +    - +      name: "POSTGRESQL_CPU_REQ" +      displayName: "PostgreSQL Min CPU Requested" +      required: true +      description: "Minimum amount of CPU time the PostgreSQL container will need (expressed in millicores)." +      value: "500m" +    - +      name: "MEMCACHED_CPU_REQ" +      displayName: "Memcached Min CPU Requested" +      required: true +      description: "Minimum amount of CPU time the Memcached container will need (expressed in millicores)." +      value: "200m" +    - +      name: "APPLICATION_MEM_REQ" +      displayName: "Application Min RAM Requested" +      required: true +      description: "Minimum amount of memory the Application container will need." +      value: "6144Mi" +    - +      name: "POSTGRESQL_MEM_REQ" +      displayName: "PostgreSQL Min RAM Requested" +      required: true +      description: "Minimum amount of memory the PostgreSQL container will need." +      value: "1024Mi" +    - +      name: "MEMCACHED_MEM_REQ" +      displayName: "Memcached Min RAM Requested" +      required: true +      description: "Minimum amount of memory the Memcached container will need." +      value: "64Mi" +    - +      name: "APPLICATION_MEM_LIMIT" +      displayName: "Application Max RAM Limit" +      required: true +      description: "Maximum amount of memory the Application container can consume." +      value: "16384Mi" +    - +      name: "POSTGRESQL_MEM_LIMIT" +      displayName: "PostgreSQL Max RAM Limit" +      required: true +      description: "Maximum amount of memory the PostgreSQL container can consume." +      value: "8192Mi" +    - +      name: "MEMCACHED_MEM_LIMIT" +      displayName: "Memcached Max RAM Limit" +      required: true +      description: "Maximum amount of memory the Memcached container can consume." +      value: "256Mi" +    - +      name: "POSTGRESQL_IMG_NAME" +      displayName: "PostgreSQL Image Name" +      description: "This is the PostgreSQL image name requested to deploy." +      value: "docker.io/manageiq/manageiq-pods" +    - +      name: "POSTGRESQL_IMG_TAG" +      displayName: "PostgreSQL Image Tag" +      description: "This is the PostgreSQL image tag/version requested to deploy." +      value: "postgresql-latest-fine" +    - +      name: "MEMCACHED_IMG_NAME" +      displayName: "Memcached Image Name" +      description: "This is the Memcached image name requested to deploy." +      value: "docker.io/manageiq/manageiq-pods" +    - +      name: "MEMCACHED_IMG_TAG" +      displayName: "Memcached Image Tag" +      description: "This is the Memcached image tag/version requested to deploy." +      value: "memcached-latest-fine" +    - +      name: "APPLICATION_IMG_NAME" +      displayName: "Application Image Name" +      description: "This is the Application image name requested to deploy." +      value: "docker.io/manageiq/manageiq-pods" +    - +      name: "APPLICATION_IMG_TAG" +      displayName: "Application Image Tag" +      description: "This is the Application image tag/version requested to deploy." +      value: "app-latest-fine" +    - +      name: "APPLICATION_DOMAIN" +      displayName: "Application Hostname" +      description: "The exposed hostname that will route to the application service, if left blank a value will be defaulted." +      value: "" +    - +      name: "APPLICATION_REPLICA_COUNT" +      displayName: "Application Replica Count" +      description: "This is the number of Application replicas requested to deploy." +      value: "1" +    - +      name: "APPLICATION_INIT_DELAY" +      displayName: "Application Init Delay" +      required: true +      description: "Delay in seconds before we attempt to initialize the application." +      value: "15" +    - +      name: "APPLICATION_VOLUME_CAPACITY" +      displayName: "Application Volume Capacity" +      required: true +      description: "Volume space available for application data." +      value: "5Gi" +    - +      name: "APPLICATION_REGION_VOLUME_CAPACITY" +      displayName: "Application Region Volume Capacity" +      required: true +      description: "Volume space available for region application data." +      value: "5Gi" +    - +      name: "DATABASE_VOLUME_CAPACITY" +      displayName: "Database Volume Capacity" +      required: true +      description: "Volume space available for database." +      value: "15Gi" diff --git a/roles/openshift_cfme/files/openshift_cfme.exports b/roles/openshift_cfme/files/openshift_cfme.exports new file mode 100644 index 000000000..5457d41fc --- /dev/null +++ b/roles/openshift_cfme/files/openshift_cfme.exports @@ -0,0 +1,3 @@ +/exports/miq-pv01 *(rw,no_root_squash,no_wdelay) +/exports/miq-pv02 *(rw,no_root_squash,no_wdelay) +/exports/miq-pv03 *(rw,no_root_squash,no_wdelay) diff --git a/roles/openshift_cfme/handlers/main.yml b/roles/openshift_cfme/handlers/main.yml new file mode 100644 index 000000000..476a5e030 --- /dev/null +++ b/roles/openshift_cfme/handlers/main.yml @@ -0,0 +1,42 @@ +--- +###################################################################### +# NOTE: These are duplicated from roles/openshift_master/handlers/main.yml +# +# TODO: Use the consolidated 'openshift_handlers' role once it's ready +# See: https://github.com/openshift/openshift-ansible/pull/4041#discussion_r118770782 +###################################################################### + +- name: restart master +  systemd: name={{ openshift.common.service_type }}-master state=restarted +  when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) +  notify: Verify API Server + +- name: restart master api +  systemd: name={{ openshift.common.service_type }}-master-api state=restarted +  when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' +  notify: Verify API Server + +- name: restart master controllers +  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted +  when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + +- name: Verify API Server +  # Using curl here since the uri module requires python-httplib2 and +  # wait_for port doesn't provide health information. +  command: > +    curl --silent --tlsv1.2 +    {% if openshift.common.version_gte_3_2_or_1_2 | bool %} +    --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt +    {% else %} +    --cacert {{ openshift.common.config_base }}/master/ca.crt +    {% endif %} +    {{ openshift.master.api_url }}/healthz/ready +  args: +    # Disables the following warning: +    # Consider using get_url or uri module rather than running curl +    warn: no +  register: api_available_output +  until: api_available_output.stdout == 'ok' +  retries: 120 +  delay: 1 +  changed_when: false diff --git a/roles/openshift_cfme/img/CFMEBasicDeployment.png b/roles/openshift_cfme/img/CFMEBasicDeployment.png Binary files differnew file mode 100644 index 000000000..a89c1e325 --- /dev/null +++ b/roles/openshift_cfme/img/CFMEBasicDeployment.png diff --git a/roles/openshift_cfme/meta/main.yml b/roles/openshift_cfme/meta/main.yml new file mode 100644 index 000000000..9200f2c3c --- /dev/null +++ b/roles/openshift_cfme/meta/main.yml @@ -0,0 +1,20 @@ +--- +galaxy_info: +  author: Tim Bielawa +  description: OpenShift CFME (Manage IQ) Deployer +  company: Red Hat, Inc. +  license: Apache License, Version 2.0 +  min_ansible_version: 2.2 +  version: 1.0 +  platforms: +  - name: EL +    versions: +    - 7 +  categories: +  - cloud +  - system +dependencies: +- role: lib_openshift +- role: lib_utils +- role: openshift_common +- role: openshift_master_facts diff --git a/roles/openshift_cfme/tasks/create_pvs.yml b/roles/openshift_cfme/tasks/create_pvs.yml new file mode 100644 index 000000000..7fa7d3997 --- /dev/null +++ b/roles/openshift_cfme/tasks/create_pvs.yml @@ -0,0 +1,36 @@ +--- +# Check for existance and then conditionally: +# - evaluate templates +# - PVs +# +# These tasks idempotently create required CFME PV objects. Do not +# call this file directly. This file is intended to be ran as an +# include that has a 'with_items' attached to it. Hence the use below +# of variables like "{{ item.pv_label }}" + +- name: "Check if the {{ item.pv_label }} template has been created already" +  oc_obj: +    namespace: "{{ openshift_cfme_project }}" +    state: list +    kind: pv +    name: "{{ item.pv_name }}" +  register: miq_pv_check + +# Skip all of this if the PV already exists +- block: +    - name: "Ensure the {{ item.pv_label }} template is evaluated" +      template: +        src: "{{ item.pv_template }}.j2" +        dest: "{{ template_dir }}/{{ item.pv_template }}" + +    - name: "Ensure {{ item.pv_label }} is created" +      oc_obj: +        namespace: "{{ openshift_cfme_project }}" +        kind: pv +        name: "{{ item.pv_name }}" +        state: present +        delete_after: True +        files: +          - "{{ template_dir }}/{{ item.pv_template }}" +  when: +    - not miq_pv_check.results.results.0 diff --git a/roles/openshift_cfme/tasks/main.yml b/roles/openshift_cfme/tasks/main.yml new file mode 100644 index 000000000..acbce7232 --- /dev/null +++ b/roles/openshift_cfme/tasks/main.yml @@ -0,0 +1,148 @@ +--- +###################################################################### +# Users, projects, and privileges + +- name: Ensure the CFME user exists +  oc_user: +    state: present +    username: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME namespace exists with CFME user as admin +  oc_project: +    state: present +    name: "{{ openshift_cfme_project }}" +    display_name: "{{ openshift_cfme_project_description }}" +    admin: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME namespace service account is privileged +  oc_adm_policy_user: +    namespace: "{{ openshift_cfme_project }}" +    user: "{{ openshift_cfme_service_account }}" +    resource_kind: scc +    resource_name: privileged +    state: present + +###################################################################### +# NFS + +- name: Ensure the /exports/ directory exists +  file: +    path: /exports/ +    state: directory +    mode: 0755 +    owner: root +    group: root + +- name: Ensure the miq-pv0X export directories exist +  file: +    path: "/exports/{{ item }}" +    state: directory +    mode: 0775 +    owner: root +    group: root +  with_items: "{{ openshift_cfme_pv_exports }}" + +- name: Ensure the NFS exports for CFME PVs exist +  copy: +    src: openshift_cfme.exports +    dest: /etc/exports.d/openshift_cfme.exports +  register: nfs_exports_updated + +- name: Ensure the NFS export table is refreshed if exports were added +  command: exportfs -ar +  when: +    - nfs_exports_updated.changed + + +###################################################################### +# Create the required CFME PVs. Check out these online docs if you +# need a refresher on includes looping with items: +# * http://docs.ansible.com/ansible/playbooks_loops.html#loops-and-includes-in-2-0 +# * http://stackoverflow.com/a/35128533 +# +# TODO: Handle the case where a PV template is updated in +# openshift-ansible and the change needs to be landed on the managed +# cluster. + +- include: create_pvs.yml +  with_items: "{{ openshift_cfme_pv_data }}" + +###################################################################### +# CFME App Template +# +# Note, this is different from the create_pvs.yml tasks in that the +# application template does not require any jinja2 evaluation. +# +# TODO: Handle the case where the server template is updated in +# openshift-ansible and the change needs to be landed on the managed +# cluster. + +- name: Check if the CFME Server template has been created already +  oc_obj: +    namespace: "{{ openshift_cfme_project }}" +    state: list +    kind: template +    name: manageiq +  register: miq_server_check + +- name: Copy over CFME Server template +  copy: +    src: miq-template.yaml +    dest: "{{ template_dir }}/miq-template.yaml" + +- name: Ensure the server template was read from disk +  debug: +    var=r_openshift_cfme_miq_template_content + +- name: Ensure CFME Server Template exists +  oc_obj: +    namespace: "{{ openshift_cfme_project }}" +    kind: template +    name: "manageiq" +    state: present +    content: "{{ r_openshift_cfme_miq_template_content }}" + +###################################################################### +# Let's do this + +- name: Ensure the CFME Server is created +  oc_process: +    namespace: "{{ openshift_cfme_project }}" +    template_name: manageiq +    create: True +  register: cfme_new_app_process +  run_once: True +  when: +    # User said to install CFME in their inventory +    - openshift_cfme_install_app | bool +    # # The server app doesn't exist already +    # - not miq_server_check.results.results.0 + +- debug: +    var: cfme_new_app_process + +###################################################################### +# Various cleanup steps + +# TODO: Not sure what to do about this right now. Might be able to +# just delete it?  This currently warns about "Unable to find +# '<TEMP_DIR>' in expected paths." +- name: Ensure the temporary PV/App templates are erased +  file: +    path: "{{ item }}" +    state: absent +  with_fileglob: +    - "{{ template_dir }}/*.yaml" + +- name: Ensure the temporary PV/app template directory is erased +  file: +    path: "{{ template_dir }}" +    state: absent + +###################################################################### + +- name: Status update +  debug: +    msg: > +      CFME has been deployed. Note that there will be a delay before +      it is fully initialized. diff --git a/roles/openshift_cfme/tasks/tune_masters.yml b/roles/openshift_cfme/tasks/tune_masters.yml new file mode 100644 index 000000000..02b0f10bf --- /dev/null +++ b/roles/openshift_cfme/tasks/tune_masters.yml @@ -0,0 +1,12 @@ +--- +- name: Ensure bulk image import limit is tuned +  yedit: +    src: /etc/origin/master/master-config.yaml +    key: 'imagePolicyConfig.maxImagesBulkImportedPerRepository' +    value: "{{ openshift_cfme_maxImagesBulkImportedPerRepository | int() }}" +    state: present +    backup: True +  notify: +    - restart master + +- meta: flush_handlers diff --git a/roles/openshift_cfme/tasks/uninstall.yml b/roles/openshift_cfme/tasks/uninstall.yml new file mode 100644 index 000000000..cba734a0e --- /dev/null +++ b/roles/openshift_cfme/tasks/uninstall.yml @@ -0,0 +1,43 @@ +--- +- include_role: +    name: lib_openshift + +- name: Uninstall CFME - ManageIQ +  debug: +    msg: Uninstalling Cloudforms Management Engine - ManageIQ + +- name: Ensure the CFME project is removed +  oc_project: +    state: absent +    name: "{{ openshift_cfme_project }}" + +- name: Ensure the CFME template is removed +  oc_obj: +    namespace: "{{ openshift_cfme_project }}" +    state: absent +    kind: template +    name: manageiq + +- name: Ensure the CFME PVs are removed +  oc_obj: +    state: absent +    all_namespaces: True +    kind: pv +    name: "{{ item }}" +  with_items: "{{ openshift_cfme_pv_exports }}" + +- name: Ensure the CFME user is removed +  oc_user: +    state: absent +    username: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME NFS Exports are removed +  file: +    path: /etc/exports.d/openshift_cfme.exports +    state: absent +  register: nfs_exports_removed + +- name: Ensure the NFS export table is refreshed if exports were removed +  command: exportfs -ar +  when: +    - nfs_exports_removed.changed diff --git a/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 new file mode 100644 index 000000000..b8c3bb277 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: +  name: miq-pv01 +spec: +  capacity: +    storage: 15Gi +  accessModes: +    - ReadWriteOnce +  nfs:  +    path: /exports/miq-pv01 +    server: {{ openshift_cfme_nfs_server }} +  persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 new file mode 100644 index 000000000..7218773f0 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: +  name: miq-pv02 +spec: +  capacity: +    storage: 5Gi +  accessModes: +    - ReadWriteOnce +  nfs:  +    path: /exports/miq-pv02 +    server: {{ openshift_cfme_nfs_server }} +  persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 new file mode 100644 index 000000000..7b40b6c69 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: +  name: miq-pv03 +spec: +  capacity: +    storage: 5Gi +  accessModes: +    - ReadWriteOnce +  nfs:  +    path: /exports/miq-pv03 +    server: {{ openshift_cfme_nfs_server }} +  persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml b/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml index 4f25a9c8f..982bd9530 100644 --- a/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml +++ b/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml @@ -48,7 +48,7 @@ objects:      annotations:        description: "Keeps track of changes in the CloudForms app image"    spec: -    dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-app +    dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-app  - apiVersion: v1    kind: PersistentVolumeClaim    metadata: @@ -188,7 +188,7 @@ objects:      annotations:        description: "Keeps track of changes in the CloudForms memcached image"    spec: -    dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-memcached +    dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-memcached  - apiVersion: v1    kind: "DeploymentConfig"    metadata: @@ -272,7 +272,7 @@ objects:      annotations:        description: "Keeps track of changes in the CloudForms postgresql image"    spec: -    dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-postgresql +    dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-postgresql  - apiVersion: v1    kind: "DeploymentConfig"    metadata: diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml deleted file mode 100644 index 14bdd1dca..000000000 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: -  name: cloudforms -spec: -  capacity: -    storage: 2Gi -  accessModes: -    - ReadWriteOnce -  nfs: -    path: /opt/nfs/volumes-app -    server: 10.19.0.216 -  persistentVolumeReclaimPolicy: Recycle diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml new file mode 100644 index 000000000..250a99b8d --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: +  name: cfme-pv01 +spec: +  capacity: +    storage: 15Gi +  accessModes: +    - ReadWriteOnce +  nfs:  +    path: /exports/cfme-pv01 +    server: <your-nfs-host-here> +  persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml deleted file mode 100644 index 709d8d976..000000000 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: -  name: nfs-pv01 -spec: -  capacity: -    storage: 2Gi -  accessModes: -    - ReadWriteOnce -  nfs: -    path: /opt/nfs/volumes -    server: 10.19.0.216 -  persistentVolumeReclaimPolicy: Recycle diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml new file mode 100644 index 000000000..cba9bbe35 --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: +  name: cfme-pv02 +spec: +  capacity: +    storage: 5Gi +  accessModes: +    - ReadWriteOnce +  nfs:  +    path: /exports/cfme-pv02 +    server: <your-nfs-host-here> +  persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml new file mode 100644 index 000000000..c08c21265 --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: +  name: cfme-pv03 +spec: +  capacity: +    storage: 5Gi +  accessModes: +    - ReadWriteOnce +  nfs:  +    path: /exports/cfme-pv03 +    server: <your-nfs-host-here> +  persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml index 4f25a9c8f..3bc6c5813 100644 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml @@ -17,6 +17,7 @@ objects:        service.alpha.openshift.io/dependencies: '[{"name":"${DATABASE_SERVICE_NAME}","namespace":"","kind":"Service"},{"name":"${MEMCACHED_SERVICE_NAME}","namespace":"","kind":"Service"}]'      name: ${NAME}    spec: +    clusterIP: None      ports:      - name: http        port: 80 @@ -48,11 +49,27 @@ objects:      annotations:        description: "Keeps track of changes in the CloudForms app image"    spec: -    dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-app +    dockerImageRepository: "${APPLICATION_IMG_NAME}" +- apiVersion: v1 +  kind: ImageStream +  metadata: +    name: cfme-openshift-postgresql +    annotations: +      description: "Keeps track of changes in the CloudForms postgresql image" +  spec: +    dockerImageRepository: "${POSTGRESQL_IMG_NAME}" +- apiVersion: v1 +  kind: ImageStream +  metadata: +    name: cfme-openshift-memcached +    annotations: +      description: "Keeps track of changes in the CloudForms memcached image" +  spec: +    dockerImageRepository: "${MEMCACHED_IMG_NAME}"  - apiVersion: v1    kind: PersistentVolumeClaim    metadata: -    name: ${DATABASE_SERVICE_NAME} +    name: "${NAME}-${DATABASE_SERVICE_NAME}"    spec:      accessModes:        - ReadWriteOnce @@ -62,45 +79,41 @@ objects:  - apiVersion: v1    kind: PersistentVolumeClaim    metadata: -    name: ${NAME} +    name: "${NAME}-region"    spec:      accessModes:        - ReadWriteOnce      resources:        requests: -        storage: ${APPLICATION_VOLUME_CAPACITY} -- apiVersion: v1 -  kind: "DeploymentConfig" +        storage: ${APPLICATION_REGION_VOLUME_CAPACITY} +- apiVersion: apps/v1beta1 +  kind: "StatefulSet"    metadata:      name: ${NAME}      annotations:        description: "Defines how to deploy the CloudForms appliance"    spec: +    serviceName: "${NAME}" +    replicas: 1      template:        metadata:          labels:            name: ${NAME}          name: ${NAME}        spec: -        volumes: -          - -            name: "cfme-app-volume" -            persistentVolumeClaim: -              claimName: ${NAME}          containers: -        - image: cloudforms/cfme-openshift-app:${APPLICATION_IMG_TAG} -          imagePullPolicy: IfNotPresent -          name: cloudforms +        - name: cloudforms +          image: "${APPLICATION_IMG_NAME}:${APPLICATION_IMG_TAG}"            livenessProbe: -            httpGet: -              path: / -              port: 80 +            tcpSocket: +              port: 443              initialDelaySeconds: 480              timeoutSeconds: 3            readinessProbe:              httpGet:                path: / -              port: 80 +              port: 443 +              scheme: HTTPS              initialDelaySeconds: 200              timeoutSeconds: 3            ports: @@ -112,8 +125,11 @@ objects:              privileged: true            volumeMounts:                - -                name: "cfme-app-volume" +                name: "${NAME}-server"                  mountPath: "/persistent" +              - +                name: "${NAME}-region" +                mountPath: "/persistent-region"            env:              -                name: "APPLICATION_INIT_DELAY" @@ -144,29 +160,32 @@ objects:                value: "${POSTGRESQL_SHARED_BUFFERS}"            resources:              requests: -              memory: "${MEMORY_APPLICATION_MIN}" +              memory: "${APPLICATION_MEM_REQ}" +              cpu: "${APPLICATION_CPU_REQ}" +            limits: +              memory: "${APPLICATION_MEM_LIMIT}"            lifecycle:              preStop:                exec:                  command:                    - /opt/rh/cfme-container-scripts/sync-pv-data -    replicas: 1 -    selector: -      name: ${NAME} -    triggers: -      - type: "ConfigChange" -      - type: "ImageChange" -        imageChangeParams: -          automatic: true -          containerNames: -            - "cloudforms" -          from: -            kind: "ImageStreamTag" -            name: "cfme-openshift-app:${APPLICATION_IMG_TAG}" -    strategy: -      type: "Recreate" -      recreateParams: -        timeoutSeconds: 1200 +        volumes: +         - +           name: "${NAME}-region" +           persistentVolumeClaim: +             claimName: ${NAME}-region +    volumeClaimTemplates: +      - metadata: +          name: "${NAME}-server" +          annotations: +            # Uncomment this if using dynamic volume provisioning. +            # https://docs.openshift.org/latest/install_config/persistent_storage/dynamically_provisioning_pvs.html +            # volume.alpha.kubernetes.io/storage-class: anything +        spec: +          accessModes: [ ReadWriteOnce ] +          resources: +            requests: +              storage: "${APPLICATION_VOLUME_CAPACITY}"  - apiVersion: v1    kind: "Service"    metadata: @@ -182,14 +201,6 @@ objects:      selector:        name: "${MEMCACHED_SERVICE_NAME}"  - apiVersion: v1 -  kind: ImageStream -  metadata: -    name: cfme-openshift-memcached -    annotations: -      description: "Keeps track of changes in the CloudForms memcached image" -  spec: -    dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-memcached -- apiVersion: v1    kind: "DeploymentConfig"    metadata:      name: "${MEMCACHED_SERVICE_NAME}" @@ -223,7 +234,7 @@ objects:          containers:            -              name: "memcached" -            image: "cloudforms/cfme-openshift-memcached:${MEMCACHED_IMG_TAG}" +            image: "${MEMCACHED_IMG_NAME}:${MEMCACHED_IMG_TAG}"              ports:                -                  containerPort: 11211 @@ -249,8 +260,11 @@ objects:                  name: "MEMCACHED_SLAB_PAGE_SIZE"                  value: "${MEMCACHED_SLAB_PAGE_SIZE}"              resources: +              requests: +                memory: "${MEMCACHED_MEM_REQ}" +                cpu: "${MEMCACHED_CPU_REQ}"                limits: -                memory: "${MEMORY_MEMCACHED_LIMIT}" +                memory: "${MEMCACHED_MEM_LIMIT}"  - apiVersion: v1    kind: "Service"    metadata: @@ -266,14 +280,6 @@ objects:      selector:        name: "${DATABASE_SERVICE_NAME}"  - apiVersion: v1 -  kind: ImageStream -  metadata: -    name: cfme-openshift-postgresql -    annotations: -      description: "Keeps track of changes in the CloudForms postgresql image" -  spec: -    dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-postgresql -- apiVersion: v1    kind: "DeploymentConfig"    metadata:      name: "${DATABASE_SERVICE_NAME}" @@ -307,11 +313,11 @@ objects:            -              name: "cfme-pgdb-volume"              persistentVolumeClaim: -              claimName: ${DATABASE_SERVICE_NAME} +              claimName: "${NAME}-${DATABASE_SERVICE_NAME}"          containers:            -              name: "postgresql" -            image: "cloudforms/cfme-openshift-postgresql:${POSTGRESQL_IMG_TAG}" +            image: "${POSTGRESQL_IMG_NAME}:${POSTGRESQL_IMG_TAG}"              ports:                -                  containerPort: 5432 @@ -350,8 +356,11 @@ objects:                  name: "POSTGRESQL_SHARED_BUFFERS"                  value: "${POSTGRESQL_SHARED_BUFFERS}"              resources: +              requests: +                memory: "${POSTGRESQL_MEM_REQ}" +                cpu: "${POSTGRESQL_CPU_REQ}"                limits: -                memory: "${MEMORY_POSTGRESQL_LIMIT}" +                memory: "${POSTGRESQL_MEM_LIMIT}"  parameters:    - @@ -420,36 +429,87 @@ parameters:      name: "POSTGRESQL_SHARED_BUFFERS"      displayName: "PostgreSQL Shared Buffer Amount"      description: "Amount of memory dedicated for PostgreSQL shared memory buffers." -    value: "64MB" +    value: "256MB"    - -    name: "MEMORY_APPLICATION_MIN" -    displayName: "Application Memory Minimum" +    name: "APPLICATION_CPU_REQ" +    displayName: "Application Min CPU Requested" +    required: true +    description: "Minimum amount of CPU time the Application container will need (expressed in millicores)." +    value: "1000m" +  - +    name: "POSTGRESQL_CPU_REQ" +    displayName: "PostgreSQL Min CPU Requested" +    required: true +    description: "Minimum amount of CPU time the PostgreSQL container will need (expressed in millicores)." +    value: "500m" +  - +    name: "MEMCACHED_CPU_REQ" +    displayName: "Memcached Min CPU Requested" +    required: true +    description: "Minimum amount of CPU time the Memcached container will need (expressed in millicores)." +    value: "200m" +  - +    name: "APPLICATION_MEM_REQ" +    displayName: "Application Min RAM Requested"      required: true      description: "Minimum amount of memory the Application container will need." -    value: "4096Mi" +    value: "6144Mi" +  - +    name: "POSTGRESQL_MEM_REQ" +    displayName: "PostgreSQL Min RAM Requested" +    required: true +    description: "Minimum amount of memory the PostgreSQL container will need." +    value: "1024Mi"    - -    name: "MEMORY_POSTGRESQL_LIMIT" -    displayName: "PostgreSQL Memory Limit" +    name: "MEMCACHED_MEM_REQ" +    displayName: "Memcached Min RAM Requested"      required: true -    description: "Maximum amount of memory the PostgreSQL container can use." -    value: "2048Mi" +    description: "Minimum amount of memory the Memcached container will need." +    value: "64Mi"    - -    name: "MEMORY_MEMCACHED_LIMIT" -    displayName: "Memcached Memory Limit" +    name: "APPLICATION_MEM_LIMIT" +    displayName: "Application Max RAM Limit"      required: true -    description: "Maximum amount of memory the Memcached container can use." +    description: "Maximum amount of memory the Application container can consume." +    value: "16384Mi" +  - +    name: "POSTGRESQL_MEM_LIMIT" +    displayName: "PostgreSQL Max RAM Limit" +    required: true +    description: "Maximum amount of memory the PostgreSQL container can consume." +    value: "8192Mi" +  - +    name: "MEMCACHED_MEM_LIMIT" +    displayName: "Memcached Max RAM Limit" +    required: true +    description: "Maximum amount of memory the Memcached container can consume."      value: "256Mi"    - +    name: "POSTGRESQL_IMG_NAME" +    displayName: "PostgreSQL Image Name" +    description: "This is the PostgreSQL image name requested to deploy." +    value: "registry.access.redhat.com/cloudforms45/cfme-openshift-postgresql" +  -      name: "POSTGRESQL_IMG_TAG"      displayName: "PostgreSQL Image Tag"      description: "This is the PostgreSQL image tag/version requested to deploy."      value: "latest"    - +    name: "MEMCACHED_IMG_NAME" +    displayName: "Memcached Image Name" +    description: "This is the Memcached image name requested to deploy." +    value: "registry.access.redhat.com/cloudforms45/cfme-openshift-memcached" +  -      name: "MEMCACHED_IMG_TAG"      displayName: "Memcached Image Tag"      description: "This is the Memcached image tag/version requested to deploy."      value: "latest"    - +    name: "APPLICATION_IMG_NAME" +    displayName: "Application Image Name" +    description: "This is the Application image name requested to deploy." +    value: "registry.access.redhat.com/cloudforms45/cfme-openshift-app" +  -      name: "APPLICATION_IMG_TAG"      displayName: "Application Image Tag"      description: "This is the Application image tag/version requested to deploy." @@ -464,16 +524,22 @@ parameters:      displayName: "Application Init Delay"      required: true      description: "Delay in seconds before we attempt to initialize the application." -    value: "30" +    value: "15"    -      name: "APPLICATION_VOLUME_CAPACITY"      displayName: "Application Volume Capacity"      required: true      description: "Volume space available for application data." -    value: "1Gi" +    value: "5Gi" +  - +    name: "APPLICATION_REGION_VOLUME_CAPACITY" +    displayName: "Application Region Volume Capacity" +    required: true +    description: "Volume space available for region application data." +    value: "5Gi"    -      name: "DATABASE_VOLUME_CAPACITY"      displayName: "Database Volume Capacity"      required: true      description: "Volume space available for database." -    value: "1Gi" +    value: "15Gi" diff --git a/roles/openshift_excluder/tasks/install.yml b/roles/openshift_excluder/tasks/install.yml index d09358bee..3a866cedf 100644 --- a/roles/openshift_excluder/tasks/install.yml +++ b/roles/openshift_excluder/tasks/install.yml @@ -1,14 +1,24 @@  --- -- name: Install docker excluder -  package: -    name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) +  '*' }}" -    state: "{{ r_openshift_excluder_docker_package_state }}" -  when: -  - r_openshift_excluder_enable_docker_excluder | bool - -- name: Install openshift excluder -  package: -    name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" -    state: "{{ r_openshift_excluder_package_state }}" -  when: -  - r_openshift_excluder_enable_openshift_excluder | bool + +- when: +  - not openshift.common.is_atomic | bool +  - r_openshift_excluder_install_ran is not defined + +  block: + +  - name: Install docker excluder +    package: +      name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) +  '*' }}" +      state: "{{ r_openshift_excluder_docker_package_state }}" +    when: +    - r_openshift_excluder_enable_docker_excluder | bool + +  - name: Install openshift excluder +    package: +      name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" +      state: "{{ r_openshift_excluder_package_state }}" +    when: +    - r_openshift_excluder_enable_openshift_excluder | bool + +  - set_fact: +      r_openshift_excluder_install_ran: True diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index 1b9bda67e..50ed3e964 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -24,12 +24,18 @@      msg: |        openshift-ansible requires Python 3 for {{ ansible_distribution }};        For information on enabling Python 3 with Ansible, see https://docs.ansible.com/ansible/python_3_support.html -  when: ansible_distribution == 'Fedora' and ansible_python['version']['major'] != 3 +  when: +  - ansible_distribution == 'Fedora' +  - ansible_python['version']['major'] != 3 +  - r_openshift_facts_ran is not defined  - name: Validate python version    fail:      msg: "openshift-ansible requires Python 2 for {{ ansible_distribution }}" -  when: ansible_distribution != 'Fedora' and ansible_python['version']['major'] != 2 +  when: +  - ansible_distribution != 'Fedora' +  - ansible_python['version']['major'] != 2 +  - r_openshift_facts_ran is not defined  # Fail as early as possible if Atomic and old version of Docker  - block: @@ -48,7 +54,9 @@        that:        - l_atomic_docker_version.stdout | replace('"', '') | version_compare('1.12','>=') -  when: l_is_atomic | bool +  when: +  - l_is_atomic | bool +  - r_openshift_facts_ran is not defined  - name: Load variables    include_vars: "{{ item }}" @@ -59,7 +67,9 @@  - name: Ensure various deps are installed    package: name={{ item }} state=present    with_items: "{{ required_packages }}" -  when: not l_is_atomic | bool +  when: +  - not l_is_atomic | bool +  - r_openshift_facts_ran is not defined  - name: Ensure various deps for running system containers are installed    package: name={{ item }} state=present @@ -67,6 +77,7 @@    when:    - not l_is_atomic | bool    - l_any_system_container | bool +  - r_openshift_facts_ran is not defined  - name: Gather Cluster facts and set is_containerized if needed    openshift_facts: @@ -99,3 +110,7 @@  - name: Set repoquery command    set_fact:      repoquery_cmd: "{{ 'dnf repoquery --latest-limit 1 -d 0' if ansible_pkg_mgr == 'dnf' else 'repoquery --plugins' }}" + +- name: Register that this already ran +  set_fact: +    r_openshift_facts_ran: True diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 27e6fe383..60aacf715 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,8 +1,24 @@ -# pylint: disable=missing-docstring +"""Check that required Docker images are available.""" +  from openshift_checks import OpenShiftCheck, get_var  from openshift_checks.mixins import DockerHostMixin +NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"] +DEPLOYMENT_IMAGE_INFO = { +    "origin": { +        "namespace": "openshift", +        "name": "origin", +        "registry_console_image": "cockpit/kubernetes", +    }, +    "openshift-enterprise": { +        "namespace": "openshift3", +        "name": "ose", +        "registry_console_image": "registry.access.redhat.com/openshift3/registry-console", +    }, +} + +  class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):      """Check that required Docker images are available. @@ -13,25 +29,13 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):      name = "docker_image_availability"      tags = ["preflight"] -      dependencies = ["skopeo", "python-docker-py"] -    deployment_image_info = { -        "origin": { -            "namespace": "openshift", -            "name": "origin", -        }, -        "openshift-enterprise": { -            "namespace": "openshift3", -            "name": "ose", -        }, -    } -      @classmethod      def is_active(cls, task_vars):          """Skip hosts with unsupported deployment types."""          deployment_type = get_var(task_vars, "openshift_deployment_type") -        has_valid_deployment_type = deployment_type in cls.deployment_image_info +        has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO          return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type @@ -70,51 +74,55 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):          return {"changed": changed} -    def required_images(self, task_vars): -        deployment_type = get_var(task_vars, "openshift_deployment_type") -        image_info = self.deployment_image_info[deployment_type] - -        openshift_release = get_var(task_vars, "openshift_release", default="latest") -        openshift_image_tag = get_var(task_vars, "openshift_image_tag") -        is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - -        images = set(self.required_docker_images( -            image_info["namespace"], -            image_info["name"], -            ["registry-console"] if "enterprise" in deployment_type else [],  # include enterprise-only image names -            openshift_release, -            is_containerized, -        )) - -        # append images with qualified image tags to our list of required images. -        # these are images with a (v0.0.0.0) tag, rather than a standard release -        # format tag (v0.0). We want to check this set in both containerized and -        # non-containerized installations. -        images.update( -            self.required_qualified_docker_images( -                image_info["namespace"], -                image_info["name"], -                openshift_image_tag, -            ), -        ) - -        return images -      @staticmethod -    def required_docker_images(namespace, name, additional_image_names, version, is_containerized): -        if is_containerized: -            return ["{}/{}:{}".format(namespace, name, version)] if name else [] - -        # include additional non-containerized images specific to the current deployment type -        return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names] - -    @staticmethod -    def required_qualified_docker_images(namespace, name, version): -        # pylint: disable=invalid-name -        return [ -            "{}/{}-{}:{}".format(namespace, name, suffix, version) -            for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"] -        ] +    def required_images(task_vars): +        """ +        Determine which images we expect to need for this host. +        Returns: a set of required images like 'openshift/origin:v3.6' + +        The thorny issue of determining the image names from the variables is under consideration +        via https://github.com/openshift/openshift-ansible/issues/4415 + +        For now we operate as follows: +        * For containerized components (master, node, ...) we look at the deployment type and +          use openshift/origin or openshift3/ose as the base for those component images. The +          version is openshift_image_tag as determined by the openshift_version role. +        * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if +          it is defined; otherwise we again use the base that depends on the deployment type. +        Registry is not included in constructed images. It may be in oreg_url or etcd image. +        """ +        required = set() +        deployment_type = get_var(task_vars, "openshift_deployment_type") +        host_groups = get_var(task_vars, "group_names") +        image_tag = get_var(task_vars, "openshift_image_tag") +        image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] +        if not image_info: +            return required + +        # template for images that run on top of OpenShift +        image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") +        image_url = get_var(task_vars, "oreg_url", default="") or image_url +        if 'nodes' in host_groups: +            for suffix in NODE_IMAGE_SUFFIXES: +                required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) +            # The registry-console is for some reason not prefixed with ose- like the other components. +            # Nor is it versioned the same, so just look for latest. +            # Also a completely different name is used for Origin. +            required.add(image_info["registry_console_image"]) + +        # images for containerized components +        if get_var(task_vars, "openshift", "common", "is_containerized"): +            components = set() +            if 'nodes' in host_groups: +                components.update(["node", "openvswitch"]) +            if 'masters' in host_groups:  # name is "origin" or "ose" +                components.add(image_info["name"]) +            for component in components: +                required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag)) +            if 'etcd' in host_groups:  # special case, note it is the same for origin/enterprise +                required.add("registry.access.redhat.com/rhel7/etcd")  # and no image tag + +        return required      def local_images(self, images, task_vars):          """Filter a list of images and return those available locally.""" @@ -124,7 +132,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):          ]      def is_image_local(self, image, task_vars): -        result = self.module_executor("docker_image_facts", {"name": image}, task_vars) +        """Check if image is already in local docker index.""" +        result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars)          if result.get("failed", False):              return False @@ -132,6 +141,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):      @staticmethod      def known_docker_registries(task_vars): +        """Build a list of docker registries available according to inventory vars."""          docker_facts = get_var(task_vars, "openshift", "docker")          regs = set(docker_facts["additional_registries"]) @@ -147,17 +157,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):          """Inspect existing images using Skopeo and return all images successfully inspected."""          return [              image for image in images -            if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries) +            if self.is_available_skopeo_image(image, registries, task_vars)          ] -    def is_available_skopeo_image(self, image, registry, task_vars): -        """Uses Skopeo to determine if required image exists in a given registry.""" +    def is_available_skopeo_image(self, image, registries, task_vars): +        """Use Skopeo to determine if required image exists in known registry(s).""" + +        # if image does already includes a registry, just use that +        if image.count("/") > 1: +            registry, image = image.split("/", 1) +            registries = [registry] -        cmd_str = "skopeo inspect docker://{registry}/{image}".format( -            registry=registry, -            image=image, -        ) +        for registry in registries: +            args = {"_raw_params": "skopeo inspect docker://{}/{}".format(registry, image)} +            result = self.execute_module("command", args, task_vars=task_vars) +            if result.get("rc", 0) == 0 and not result.get("failed"): +                return True -        args = {"_raw_params": cmd_str} -        result = self.module_executor("command", args, task_vars) -        return not result.get("failed", False) and result.get("rc", 0) == 0 +        return False diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index 7f1751b36..2bd615457 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -34,7 +34,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):              }          # attempt to get the docker info hash from the API -        info = self.execute_module("docker_info", {}, task_vars) +        info = self.execute_module("docker_info", {}, task_vars=task_vars)          if info.get("failed"):              return {"failed": True, "changed": changed,                      "msg": "Failed to query Docker API. Is docker running on this host?"} @@ -146,7 +146,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):          vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name          # should return free space like "  12.00g" if the VG exists; empty if it does not -        ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars) +        ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars)          if ret.get("failed") or ret.get("rc", 0) != 0:              raise OpenShiftCheckException(                  "Is LVM installed? Failed to run /sbin/vgs " diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 7f3d78cc4..2cb2e21aa 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -40,8 +40,11 @@ class DockerHostMixin(object):          # NOTE: we would use the "package" module but it's actually an action plugin          # and it's not clear how to invoke one of those. This is about the same anyway: -        pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum") -        result = self.module_executor(pkg_manager, {"name": self.dependencies, "state": "present"}, task_vars) +        result = self.execute_module( +            get_var(task_vars, "ansible_pkg_mgr", default="yum"), +            {"name": self.dependencies, "state": "present"}, +            task_vars=task_vars, +        )          msg = result.get("msg", "")          if result.get("failed"):              if "No package matching" in msg: diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 1e45ae3af..2dd045f1f 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -43,7 +43,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck):                  },              ],          } -        return self.execute_module("rpm_version", args, task_vars) +        return self.execute_module("rpm_version", args, task_vars=task_vars)      def get_required_ovs_version(self, task_vars):          """Return the correct Open vSwitch version for the current OpenShift version""" diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index a7eb720fd..0dd2b1286 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -25,7 +25,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):              packages.update(self.node_packages(rpm_prefix))          args = {"packages": sorted(set(packages))} -        return self.execute_module("check_yum_update", args, tmp, task_vars) +        return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars)      @staticmethod      def master_packages(rpm_prefix): @@ -36,8 +36,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):              "bash-completion",              "cockpit-bridge",              "cockpit-docker", -            "cockpit-kubernetes", -            "cockpit-shell", +            "cockpit-system",              "cockpit-ws",              "etcd",              "httpd-tools", diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index fd0c0a755..f432380c6 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck):      def run(self, tmp, task_vars):          args = {"packages": []} -        return self.execute_module("check_yum_update", args, tmp, task_vars) +        return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 2e737818b..6a76bb93d 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -71,7 +71,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):              ],          } -        return self.execute_module("aos_version", args, tmp, task_vars) +        return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars)      def get_required_ovs_version(self, task_vars):          """Return the correct Open vSwitch version for the current OpenShift version. diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 197c65f51..0a7c0f8d3 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -31,15 +31,15 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active      (False, True),  ])  def test_all_images_available_locally(is_containerized, is_atomic): -    def execute_module(module_name, args, task_vars): +    def execute_module(module_name, module_args, task_vars):          if module_name == "yum":              return {"changed": True}          assert module_name == "docker_image_facts" -        assert 'name' in args -        assert args['name'] +        assert 'name' in module_args +        assert module_args['name']          return { -            'images': [args['name']], +            'images': [module_args['name']],          }      result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( @@ -52,8 +52,8 @@ def test_all_images_available_locally(is_containerized, is_atomic):              docker=dict(additional_registries=["docker.io"]),          ),          openshift_deployment_type='origin', -        openshift_release='v3.4',          openshift_image_tag='3.4', +        group_names=['nodes', 'masters'],      ))      assert not result.get('failed', False) @@ -64,7 +64,7 @@ def test_all_images_available_locally(is_containerized, is_atomic):      True,  ])  def test_all_images_available_remotely(available_locally): -    def execute_module(module_name, args, task_vars): +    def execute_module(module_name, module_args, task_vars):          if module_name == 'docker_image_facts':              return {'images': [], 'failed': available_locally}          return {'changed': False} @@ -79,8 +79,8 @@ def test_all_images_available_remotely(available_locally):              docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]),          ),          openshift_deployment_type='origin', -        openshift_release='3.4',          openshift_image_tag='v3.4', +        group_names=['nodes', 'masters'],      ))      assert not result.get('failed', False) @@ -108,8 +108,8 @@ def test_all_images_unavailable():              docker=dict(additional_registries=["docker.io"]),          ),          openshift_deployment_type="openshift-enterprise", -        openshift_release=None, -        openshift_image_tag='latest' +        openshift_image_tag='latest', +        group_names=['nodes', 'masters'],      ))      assert actual['failed'] @@ -147,8 +147,8 @@ def test_skopeo_update_failure(message, extra_words):              docker=dict(additional_registries=["unknown.io"]),          ),          openshift_deployment_type="openshift-enterprise", -        openshift_release='',          openshift_image_tag='', +        group_names=['nodes', 'masters'],      ))      assert actual["failed"] @@ -177,8 +177,85 @@ def test_registry_availability(deployment_type, registries):              docker=dict(additional_registries=registries),          ),          openshift_deployment_type=deployment_type, -        openshift_release='',          openshift_image_tag='', +        group_names=['nodes', 'masters'],      ))      assert not actual.get("failed", False) + + +@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [ +    (  # standard set of stuff required on nodes +        "origin", False, ['nodes'], None, +        set([ +            'openshift/origin-pod:vtest', +            'openshift/origin-deployer:vtest', +            'openshift/origin-docker-registry:vtest', +            'openshift/origin-haproxy-router:vtest', +            'cockpit/kubernetes',  # origin version of registry-console +        ]) +    ), +    (  # set a different URL for images +        "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}', +        set([ +            'foo.io/openshift/origin-pod:vtest', +            'foo.io/openshift/origin-deployer:vtest', +            'foo.io/openshift/origin-docker-registry:vtest', +            'foo.io/openshift/origin-haproxy-router:vtest', +            'cockpit/kubernetes',  # AFAICS this is not built from the URL +        ]) +    ), +    ( +        "origin", True, ['nodes', 'masters', 'etcd'], None, +        set([ +            # images running on top of openshift +            'openshift/origin-pod:vtest', +            'openshift/origin-deployer:vtest', +            'openshift/origin-docker-registry:vtest', +            'openshift/origin-haproxy-router:vtest', +            'cockpit/kubernetes', +            # containerized component images +            'openshift/origin:vtest', +            'openshift/node:vtest', +            'openshift/openvswitch:vtest', +            'registry.access.redhat.com/rhel7/etcd', +        ]) +    ), +    (  # enterprise images +        "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45', +        set([ +            'foo.io/openshift3/ose-pod:f13ac45', +            'foo.io/openshift3/ose-deployer:f13ac45', +            'foo.io/openshift3/ose-docker-registry:f13ac45', +            'foo.io/openshift3/ose-haproxy-router:f13ac45', +            # registry-console is not constructed/versioned the same as the others. +            'registry.access.redhat.com/openshift3/registry-console', +            # containerized images aren't built from oreg_url +            'openshift3/node:vtest', +            'openshift3/openvswitch:vtest', +        ]) +    ), +    ( +        "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45', +        set([ +            'registry.access.redhat.com/rhel7/etcd', +            # lb does not yet come in a containerized version +        ]) +    ), + +]) +def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected): +    task_vars = dict( +        openshift=dict( +            common=dict( +                is_containerized=is_containerized, +                is_atomic=False, +            ), +        ), +        openshift_deployment_type=deployment_type, +        group_names=groups, +        oreg_url=oreg_url, +        openshift_image_tag='vtest', +    ) + +    assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py index 292a323db..876614b1d 100644 --- a/roles/openshift_health_checker/test/docker_storage_test.py +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -77,7 +77,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}      ),  ])  def test_check_storage_driver(docker_info, failed, expect_msg): -    def execute_module(module_name, args, tmp=None, task_vars=None): +    def execute_module(module_name, module_args, tmp=None, task_vars=None):          if module_name == "yum":              return {}          if module_name != "docker_info": @@ -187,7 +187,7 @@ def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg):      )  ])  def test_vg_free(pool, command_returns, raises, returns): -    def execute_module(module_name, args, tmp=None, task_vars=None): +    def execute_module(module_name, module_args, tmp=None, task_vars=None):          if module_name != "command":              raise ValueError("not expecting module " + module_name)          return command_returns diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml index 751489958..d895e9a68 100644 --- a/roles/openshift_hosted/tasks/registry/registry.yml +++ b/roles/openshift_hosted/tasks/registry/registry.yml @@ -124,6 +124,35 @@      edits: "{{ openshift_hosted_registry_edits }}"      force: "{{ True|bool in openshift_hosted_registry_force }}" +- name: Ensure OpenShift registry correctly rolls out (best-effort today) +  command: | +    oc rollout status deploymentconfig {{ openshift_hosted_registry_name }} \ +                      --namespace {{ openshift_hosted_registry_namespace }} \ +                      --config {{ openshift.common.config_base }}/master/admin.kubeconfig +  async: 600 +  poll: 15 +  failed_when: false + +- name: Determine the latest version of the OpenShift registry deployment +  command: | +    oc get deploymentconfig {{ openshift_hosted_registry_name }} \ +           --namespace {{ openshift_hosted_registry_namespace }} \ +           --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ +           -o jsonpath='{ .status.latestVersion }' +  register: openshift_hosted_registry_latest_version + +- name: Sanity-check that the OpenShift registry rolled out correctly +  command: | +    oc get replicationcontroller {{ openshift_hosted_registry_name }}-{{ openshift_hosted_registry_latest_version.stdout }} \ +           --namespace {{ openshift_hosted_registry_namespace }} \ +           --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ +           -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' +  register: openshift_hosted_registry_rc_phase +  until: "'Running' not in openshift_hosted_registry_rc_phase.stdout" +  delay: 15 +  retries: 40 +  failed_when: "'Failed' in openshift_hosted_registry_rc_phase.stdout" +  - include: storage/glusterfs.yml    when:    - openshift.hosted.registry.storage.kind | default(none) == 'glusterfs' or openshift.hosted.registry.storage.glusterfs.swap diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index 192afc87a..160ae2f5e 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -55,7 +55,7 @@      state: present    with_items: "{{ openshift_hosted_routers }}" -- name: Grant the router serivce account(s) access to the appropriate scc +- name: Grant the router service account(s) access to the appropriate scc    oc_adm_policy_user:      user: "system:serviceaccount:{{ item.namespace }}:{{ item.serviceaccount }}"      namespace: "{{ item.namespace }}" @@ -89,18 +89,37 @@      ports: "{{ item.ports }}"      stats_port: "{{ item.stats_port }}"    with_items: "{{ openshift_hosted_routers }}" -  register: routerout -# This should probably move to module -- name: wait for deploy -  pause: -    seconds: 30 -  when: routerout.changed +- name: Ensure OpenShift router correctly rolls out (best-effort today) +  command: | +    oc rollout status deploymentconfig {{ item.name }} \ +                      --namespace {{ item.namespace | default('default') }} \ +                      --config {{ openshift.common.config_base }}/master/admin.kubeconfig +  async: 600 +  poll: 15 +  with_items: "{{ openshift_hosted_routers }}" +  failed_when: false -- name: Ensure router replica count matches desired -  oc_scale: -    kind: dc -    name: "{{ item.name | default('router') }}" -    namespace: "{{ item.namespace | default('default') }}" -    replicas: "{{ item.replicas }}" +- name: Determine the latest version of the OpenShift router deployment +  command: | +    oc get deploymentconfig {{ item.name }} \ +           --namespace {{ item.namespace }} \ +           --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ +           -o jsonpath='{ .status.latestVersion }' +  register: openshift_hosted_routers_latest_version    with_items: "{{ openshift_hosted_routers }}" + +- name: Poll for OpenShift router deployment success +  command: | +    oc get replicationcontroller {{ item.0.name }}-{{ item.1.stdout }} \ +           --namespace {{ item.0.namespace }} \ +           --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ +           -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' +  register: openshift_hosted_router_rc_phase +  until: "'Running' not in openshift_hosted_router_rc_phase.stdout" +  delay: 15 +  retries: 40 +  failed_when: "'Failed' in openshift_hosted_router_rc_phase.stdout" +  with_together: +  - "{{ openshift_hosted_routers }}" +  - "{{ openshift_hosted_routers_latest_version.results }}" diff --git a/roles/openshift_logging/README.md b/roles/openshift_logging/README.md index 0c60ef6fd..dd0f22d4b 100644 --- a/roles/openshift_logging/README.md +++ b/roles/openshift_logging/README.md @@ -55,6 +55,9 @@ When both `openshift_logging_install_logging` and `openshift_logging_upgrade_log  - `openshift_logging_fluentd_use_journal`: NOTE: Fluentd will attempt to detect whether or not Docker is using the journald log driver when using the default of empty.  - `openshift_logging_fluentd_journal_read_from_head`: If empty, Fluentd will use its internal default, which is false.  - `openshift_logging_fluentd_hosts`: List of nodes that should be labeled for Fluentd to be deployed to. Defaults to ['--all']. +- `openshift_logging_fluentd_buffer_queue_limit`: Buffer queue limit for Fluentd. Defaults to 1024. +- `openshift_logging_fluentd_buffer_size_limit`: Buffer chunk limit for Fluentd. Defaults to 1m. +  - `openshift_logging_es_host`: The name of the ES service Fluentd should send logs to. Defaults to 'logging-es'.  - `openshift_logging_es_port`: The port for the ES service Fluentd should sent its logs to. Defaults to '9200'. @@ -155,3 +158,5 @@ Elasticsearch OPS too, if using an OPS cluster:  - `openshift_logging_mux_namespaces`: Default `[]` - additional namespaces to    create for _external_ mux clients to associate with their logs - users will    need to set this +- `openshift_logging_mux_buffer_queue_limit`: Default `[1024]` - Buffer queue limit for Mux. +- `openshift_logging_mux_buffer_size_limit`: Default `[1m]` - Buffer chunk limit for Mux. diff --git a/roles/openshift_logging/defaults/main.yml b/roles/openshift_logging/defaults/main.yml index 3c343c9dc..66d880d23 100644 --- a/roles/openshift_logging/defaults/main.yml +++ b/roles/openshift_logging/defaults/main.yml @@ -76,6 +76,8 @@ openshift_logging_fluentd_use_journal: "{{ openshift_hosted_logging_use_journal  openshift_logging_fluentd_journal_source: "{{ openshift_hosted_logging_journal_source | default('') }}"  openshift_logging_fluentd_journal_read_from_head: "{{ openshift_hosted_logging_journal_read_from_head | default('') }}"  openshift_logging_fluentd_hosts: ['--all'] +openshift_logging_fluentd_buffer_queue_limit: 1024 +openshift_logging_fluentd_buffer_size_limit: 1m  openshift_logging_es_host: logging-es  openshift_logging_es_port: 9200 diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml index 7c1062b77..66dc0e096 100644 --- a/roles/openshift_logging/tasks/install_logging.yaml +++ b/roles/openshift_logging/tasks/install_logging.yaml @@ -119,6 +119,12 @@      openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_pvc_size }}"      openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_pvc_dynamic }}"      openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" +    openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}" +    openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}" +    openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}" +    openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" +    openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" +    openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}"    with_together:    - "{{ openshift_logging_facts.elasticsearch_ops.deploymentconfigs }}" @@ -141,6 +147,12 @@      openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_pvc_size }}"      openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_pvc_dynamic }}"      openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" +    openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}" +    openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}" +    openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}" +    openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" +    openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" +    openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}"    with_sequence: count={{ openshift_logging_es_ops_cluster_size | int - openshift_logging_facts.elasticsearch_ops.deploymentconfigs.keys() | count }}    when: diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index f1d15b76d..684dbe0a0 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -269,6 +269,75 @@      - "{{ tempdir }}/templates/logging-es-dc.yml"      delete_after: true +- name: Retrieving the cert to use when generating secrets for the {{ es_component }} component +  slurp: +    src: "{{ generated_certs_dir }}/{{ item.file }}" +  register: key_pairs +  with_items: +  - { name: "ca_file", file: "ca.crt" } +  - { name: "es_key", file: "system.logging.es.key" } +  - { name: "es_cert", file: "system.logging.es.crt" } +  when: openshift_logging_es_allow_external | bool + +- set_fact: +    es_key: "{{ lookup('file', openshift_logging_es_key) | b64encode }}" +  when: +  - openshift_logging_es_key | trim | length > 0 +  - openshift_logging_es_allow_external | bool +  changed_when: false + +- set_fact: +    es_cert: "{{ lookup('file', openshift_logging_es_cert) | b64encode  }}" +  when: +  - openshift_logging_es_cert | trim | length > 0 +  - openshift_logging_es_allow_external | bool +  changed_when: false + +- set_fact: +    es_ca: "{{ lookup('file', openshift_logging_es_ca_ext) | b64encode  }}" +  when: +  - openshift_logging_es_ca_ext | trim | length > 0 +  - openshift_logging_es_allow_external | bool +  changed_when: false + +- set_fact: +    es_ca: "{{ key_pairs | entry_from_named_pair('ca_file') }}" +  when: +  - es_ca is not defined +  - openshift_logging_es_allow_external | bool +  changed_when: false + +- name: Generating Elasticsearch {{ es_component }} route template +  template: +    src: route_reencrypt.j2 +    dest: "{{mktemp.stdout}}/templates/logging-{{ es_component }}-route.yaml" +  vars: +    obj_name: "logging-{{ es_component }}" +    route_host: "{{ openshift_logging_es_hostname }}" +    service_name: "logging-{{ es_component }}" +    tls_key: "{{ es_key | default('') | b64decode }}" +    tls_cert: "{{ es_cert | default('') | b64decode }}" +    tls_ca_cert: "{{ es_ca | b64decode }}" +    tls_dest_ca_cert: "{{ key_pairs | entry_from_named_pair('ca_file') | b64decode }}" +    edge_term_policy: "{{ openshift_logging_es_edge_term_policy | default('') }}" +    labels: +      component: support +      logging-infra: support +      provider: openshift +  changed_when: no +  when: openshift_logging_es_allow_external | bool + +# This currently has an issue if the host name changes +- name: Setting Elasticsearch {{ es_component }} route +  oc_obj: +    state: present +    name: "logging-{{ es_component }}" +    namespace: "{{ openshift_logging_elasticsearch_namespace }}" +    kind: route +    files: +    - "{{ tempdir }}/templates/logging-{{ es_component }}-route.yaml" +  when: openshift_logging_es_allow_external | bool +  ## Placeholder for migration when necessary ##  - name: Delete temp directory diff --git a/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 b/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 index 377abe21f..38948ba2f 100644 --- a/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 +++ b/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 @@ -35,6 +35,12 @@ appender:      layout:        type: consolePattern        conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" +    # need this filter until https://github.com/openshift/origin/issues/14515 is fixed +    filter: +      1: +        type: org.apache.log4j.varia.StringMatchFilter +        StringToMatch: "SSL Problem illegal change cipher spec msg, conn state = 6, handshake state = 1" +        AcceptOnMatch: false    file:      type: dailyRollingFile @@ -43,6 +49,12 @@ appender:      layout:        type: pattern        conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" +    # need this filter until https://github.com/openshift/origin/issues/14515 is fixed +    filter: +      1: +        type: org.apache.log4j.varia.StringMatchFilter +        StringToMatch: "SSL Problem illegal change cipher spec msg, conn state = 6, handshake state = 1" +        AcceptOnMatch: false    # Use the following log4j-extras RollingFileAppender to enable gzip compression of log files.    # For more information see https://logging.apache.org/log4j/extras/apidocs/org/apache/log4j/rolling/RollingFileAppender.html diff --git a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 index 58c325c8a..409e564c2 100644 --- a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 +++ b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 @@ -16,6 +16,7 @@ index:  node:    master: ${IS_MASTER}    data: ${HAS_DATA} +  max_local_storage_nodes: 1  network:    host: 0.0.0.0 diff --git a/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 b/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 new file mode 100644 index 000000000..cf8a9e65f --- /dev/null +++ b/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 @@ -0,0 +1,36 @@ +apiVersion: "v1" +kind: "Route" +metadata: +  name: "{{obj_name}}" +{% if labels is defined%} +  labels: +{% for key, value in labels.iteritems() %} +    {{key}}: {{value}} +{% endfor %} +{% endif %} +spec: +  host: {{ route_host }} +  tls: +{% if tls_key is defined and tls_key | length > 0 %} +    key: | +{{ tls_key|indent(6, true) }} +{% if tls_cert is defined and tls_cert | length > 0 %} +    certificate: | +{{ tls_cert|indent(6, true) }} +{% endif %} +{% endif %} +    caCertificate: | +{% for line in tls_ca_cert.split('\n') %} +      {{ line }} +{% endfor %} +    destinationCACertificate: | +{% for line in tls_dest_ca_cert.split('\n') %} +      {{ line }} +{% endfor %} +    termination: reencrypt +{% if edge_term_policy is defined and edge_term_policy | length > 0 %} +    insecureEdgeTerminationPolicy: {{ edge_term_policy }} +{% endif %} +  to: +    kind: Service +    name: {{ service_name }} diff --git a/roles/openshift_logging_fluentd/templates/fluentd.j2 b/roles/openshift_logging_fluentd/templates/fluentd.j2 index e185938e3..a5695ee26 100644 --- a/roles/openshift_logging_fluentd/templates/fluentd.j2 +++ b/roles/openshift_logging_fluentd/templates/fluentd.j2 @@ -93,6 +93,14 @@ spec:            value: "{{ openshift_logging_fluentd_journal_source | default('') }}"          - name: "JOURNAL_READ_FROM_HEAD"            value: "{{ openshift_logging_fluentd_journal_read_from_head | lower }}" +        - name: "BUFFER_QUEUE_LIMIT" +          value: "{{ openshift_logging_fluentd_buffer_queue_limit }}" +        - name: "BUFFER_SIZE_LIMIT" +          value: "{{ openshift_logging_fluentd_buffer_size_limit }}" +        - name: "FLUENTD_CPU_LIMIT" +          value: "{{ openshift_logging_fluentd_cpu_limit }}" +        - name: "FLUENTD_MEMORY_LIMIT" +          value: "{{ openshift_logging_fluentd_memory_limit }}"        volumes:        - name: runlogjournal          hostPath: diff --git a/roles/openshift_logging_mux/defaults/main.yml b/roles/openshift_logging_mux/defaults/main.yml index 10fa4372c..77e47d38c 100644 --- a/roles/openshift_logging_mux/defaults/main.yml +++ b/roles/openshift_logging_mux/defaults/main.yml @@ -10,7 +10,9 @@ openshift_logging_mux_namespace: logging  ### Common settings  openshift_logging_mux_nodeselector: "{{ openshift_hosted_logging_mux_nodeselector_label | default('') | map_from_pairs }}"  openshift_logging_mux_cpu_limit: 500m -openshift_logging_mux_memory_limit: 1Gi +openshift_logging_mux_memory_limit: 2Gi +openshift_logging_mux_buffer_queue_limit: 1024 +openshift_logging_mux_buffer_size_limit: 1m  openshift_logging_mux_replicas: 1 diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2 index 502cd3347..243698c6a 100644 --- a/roles/openshift_logging_mux/templates/mux.j2 +++ b/roles/openshift_logging_mux/templates/mux.j2 @@ -103,6 +103,14 @@ spec:            value: "true"          - name: MUX_ALLOW_EXTERNAL            value: "{{ openshift_logging_mux_allow_external | default('false') }}" +        - name: "BUFFER_QUEUE_LIMIT" +          value: "{{ openshift_logging_mux_buffer_queue_limit }}" +        - name: "BUFFER_SIZE_LIMIT" +          value: "{{ openshift_logging_mux_buffer_size_limit }}" +        - name: "MUX_CPU_LIMIT" +          value: "{{ openshift_logging_mux_cpu_limit }}" +        - name: "MUX_MEMORY_LIMIT" +          value: "{{ openshift_logging_mux_memory_limit }}"        volumes:        - name: config          configMap: diff --git a/roles/openshift_master/defaults/main.yml b/roles/openshift_master/defaults/main.yml index 14a1daf6c..6a082d71a 100644 --- a/roles/openshift_master/defaults/main.yml +++ b/roles/openshift_master/defaults/main.yml @@ -1,4 +1,3 @@  ---  openshift_node_ips: [] -# TODO: update setting these values based on the facts -#openshift_version: "{{ openshift_pkg_version | default(openshift_image_tag | default(openshift.docker.openshift_image_tag | default(''))) }}" +r_openshift_master_clean_install: false diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index aed5598c0..035c15fef 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -164,6 +164,26 @@      - restart master api      - restart master controllers +- name: Configure master to use etcd3 storage backend on 3.6 clean installs +  yedit: +    src: /etc/origin/master/master-config.yaml +    key: "{{ item.key }}" +    value: "{{ item.value }}" +  with_items: +    - key: kubernetesMasterConfig.apiServerArguments.storage-backend +      value: +        - etcd3 +    - key: kubernetesMasterConfig.apiServerArguments.storage-media-type +      value: +        - application/vnd.kubernetes.protobuf +  when: +    - r_openshift_master_clean_install +    - openshift.common.version_gte_3_6 +  notify: +    - restart master +    - restart master api +    - restart master controllers +  - include: set_loopback_context.yml    when: openshift.common.version_gte_3_2_or_1_2 diff --git a/roles/openshift_metrics/README.md b/roles/openshift_metrics/README.md index 84503217b..1f10de4a2 100644 --- a/roles/openshift_metrics/README.md +++ b/roles/openshift_metrics/README.md @@ -68,6 +68,9 @@ For default values, see [`defaults/main.yaml`](defaults/main.yaml).  - `openshift_metrics_resolution`: How often metrics should be gathered. +- `openshift_metrics_install_hawkular_agent`: Install the Hawkular OpenShift Agent (HOSA). HOSA can be used +  to collect custom metrics from your pods. This component is currently in tech-preview and is not installed by default. +  ## Additional variables to control resource limits  Each metrics component (hawkular, cassandra, heapster) can specify a cpu and memory limits and requests by setting  the corresponding role variable: diff --git a/roles/openshift_metrics/defaults/main.yaml b/roles/openshift_metrics/defaults/main.yaml index 467db34c8..ba50566e9 100644 --- a/roles/openshift_metrics/defaults/main.yaml +++ b/roles/openshift_metrics/defaults/main.yaml @@ -31,6 +31,14 @@ openshift_metrics_heapster_requests_memory: 0.9375G  openshift_metrics_heapster_requests_cpu: null  openshift_metrics_heapster_nodeselector: "" +openshift_metrics_install_hawkular_agent: False +openshift_metrics_hawkular_agent_limits_memory: null +openshift_metrics_hawkular_agent_limits_cpu: null +openshift_metrics_hawkular_agent_requests_memory: null +openshift_metrics_hawkular_agent_requests_cpu: null +openshift_metrics_hawkular_agent_nodeselector: "" +openshift_metrics_hawkular_agent_namespace: "default" +  openshift_metrics_hawkular_hostname: "hawkular-metrics.{{openshift_master_default_subdomain}}"  openshift_metrics_duration: 7 diff --git a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml index fb4fe2f03..7b81b3c10 100644 --- a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml +++ b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml @@ -73,6 +73,8 @@          {{ hawkular_secrets['hawkular-metrics.key'] }}        tls.truststore.crt: >          {{ hawkular_secrets['hawkular-cassandra.crt'] }} +      ca.crt: > +        {{ hawkular_secrets['ca.crt'] }}    when: name not in metrics_secrets.stdout_lines    changed_when: no diff --git a/roles/openshift_metrics/tasks/install_hosa.yaml b/roles/openshift_metrics/tasks/install_hosa.yaml new file mode 100644 index 000000000..cc533a68b --- /dev/null +++ b/roles/openshift_metrics/tasks/install_hosa.yaml @@ -0,0 +1,44 @@ +--- +- name: Generate Hawkular Agent (HOSA) Cluster Role +  template: +    src: hawkular_openshift_agent_role.j2 +    dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-role.yaml" +  changed_when: no + +- name: Generate Hawkular Agent (HOSA) Service Account +  template: +    src: hawkular_openshift_agent_sa.j2 +    dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-sa.yaml" +  changed_when: no + +- name: Generate Hawkular Agent (HOSA) Daemon Set +  template: +    src: hawkular_openshift_agent_ds.j2 +    dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-ds.yaml" +  vars: +    node_selector: "{{openshift_metrics_hawkular_agent_nodeselector | default('') }}" +  changed_when: no + +- name: Generate the Hawkular Agent (HOSA) Configmap +  template: +    src: hawkular_openshift_agent_cm.j2 +    dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-cm.yaml" +  changed_when: no + +- name: Generate role binding for the hawkular-openshift-agent service account +  template: +    src: rolebinding.j2 +    dest: "{{ mktemp.stdout }}/templates/metrics-hawkular-agent-rolebinding.yaml" +  vars: +    cluster: True +    obj_name: hawkular-openshift-agent-rb +    labels: +      metrics-infra: hawkular-agent +    roleRef: +      kind: ClusterRole +      name: hawkular-openshift-agent +    subjects: +      - kind: ServiceAccount +        name: hawkular-openshift-agent +        namespace: "{{openshift_metrics_hawkular_agent_namespace}}" +  changed_when: no diff --git a/roles/openshift_metrics/tasks/install_metrics.yaml b/roles/openshift_metrics/tasks/install_metrics.yaml index 74eb56713..fdf4ae57f 100644 --- a/roles/openshift_metrics/tasks/install_metrics.yaml +++ b/roles/openshift_metrics/tasks/install_metrics.yaml @@ -16,11 +16,19 @@    include: install_heapster.yaml    when: openshift_metrics_heapster_standalone | bool -- find: paths={{ mktemp.stdout }}/templates patterns=*.yaml +- name: Install Hawkular OpenShift Agent (HOSA) +  include: install_hosa.yaml +  when: openshift_metrics_install_hawkular_agent | default(false) | bool + +- find: +    paths: "{{ mktemp.stdout }}/templates" +    patterns: "^(?!metrics-hawkular-openshift-agent).*.yaml" +    use_regex: true    register: object_def_files    changed_when: no -- slurp: src={{item.path}} +- slurp: +    src: "{{item.path}}"    register: object_defs    with_items: "{{object_def_files.files}}"    changed_when: no @@ -34,6 +42,31 @@      file_content: "{{ item.content | b64decode | from_yaml }}"    with_items: "{{ object_defs.results }}" +- find: +    paths: "{{ mktemp.stdout }}/templates" +    patterns: "^metrics-hawkular-openshift-agent.*.yaml" +    use_regex: true +  register: hawkular_agent_object_def_files +  when: openshift_metrics_install_hawkular_agent | bool +  changed_when: no + +- slurp: +    src: "{{item.path}}" +  register: hawkular_agent_object_defs +  with_items: "{{ hawkular_agent_object_def_files.files }}" +  when: openshift_metrics_install_hawkular_agent | bool +  changed_when: no + +- name: Create Hawkular Agent objects +  include: oc_apply.yaml +  vars: +    kubeconfig: "{{ mktemp.stdout }}/admin.kubeconfig" +    namespace: "{{ openshift_metrics_hawkular_agent_namespace }}" +    file_name: "{{ item.source }}" +    file_content: "{{ item.content | b64decode | from_yaml }}" +  with_items: "{{ hawkular_agent_object_defs.results }}" +  when: openshift_metrics_install_hawkular_agent | bool +  - include: update_master_config.yaml  - command: > diff --git a/roles/openshift_metrics/tasks/main.yaml b/roles/openshift_metrics/tasks/main.yaml index 5d8506a73..0b5f23c24 100644 --- a/roles/openshift_metrics/tasks/main.yaml +++ b/roles/openshift_metrics/tasks/main.yaml @@ -44,6 +44,9 @@  - include: "{{ (openshift_metrics_install_metrics | bool) | ternary('install_metrics.yaml','uninstall_metrics.yaml') }}" +- include: uninstall_hosa.yaml +  when: not openshift_metrics_install_hawkular_agent | bool +  - name: Delete temp directory    local_action: file path=local_tmp.stdout state=absent    tags: metrics_cleanup diff --git a/roles/openshift_metrics/tasks/oc_apply.yaml b/roles/openshift_metrics/tasks/oc_apply.yaml index dd67703b4..1e1af40e8 100644 --- a/roles/openshift_metrics/tasks/oc_apply.yaml +++ b/roles/openshift_metrics/tasks/oc_apply.yaml @@ -14,7 +14,7 @@    command: >      {{ openshift.common.client_binary }} --config={{ kubeconfig }}      apply -f {{ file_name }} -    -n {{ openshift_metrics_project }} +    -n {{namespace}}    register: generation_apply    failed_when: "'error' in generation_apply.stderr"    changed_when: no diff --git a/roles/openshift_metrics/tasks/uninstall_hosa.yaml b/roles/openshift_metrics/tasks/uninstall_hosa.yaml new file mode 100644 index 000000000..42ed02460 --- /dev/null +++ b/roles/openshift_metrics/tasks/uninstall_hosa.yaml @@ -0,0 +1,15 @@ +--- +- name: remove Hawkular Agent (HOSA) components +  command: > +    {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig +    delete --ignore-not-found --selector=metrics-infra=agent +    all,sa,secrets,templates,routes,pvc,rolebindings,clusterrolebindings +  register: delete_metrics +  changed_when: delete_metrics.stdout != 'No resources found' + +- name: remove rolebindings +  command: > +    {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig +    delete --ignore-not-found +    clusterrolebinding/hawkular-openshift-agent-rb +  changed_when: delete_metrics.stdout != 'No resources found' diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 new file mode 100644 index 000000000..bf472c066 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 @@ -0,0 +1,54 @@ +id: hawkular-openshift-agent +kind: ConfigMap +apiVersion: v1 +name: Hawkular OpenShift Agent Configuration +metadata: +  name: hawkular-openshift-agent-configuration +  labels: +    metrics-infra: agent +  namespace: {{openshift_metrics_hawkular_agent_namespace}} +data: +  config.yaml: | +    kubernetes: +      tenant: ${POD:namespace_name} +    hawkular_server: +      url: https://hawkular-metrics.openshift-infra.svc.cluster.local        +      credentials: +        username: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.username +        password: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.password +      ca_cert_file: secret:openshift-infra/hawkular-metrics-certs/ca.crt +    emitter: +      status_enabled: false +    collector: +      minimum_collection_interval: 10s +      default_collection_interval: 30s +      metric_id_prefix: pod/${POD:uid}/custom/ +      tags: +        metric_name: ${METRIC:name} +        description: ${METRIC:description} +        units: ${METRIC:units} +        namespace_id: ${POD:namespace_uid} +        namespace_name: ${POD:namespace_name} +        node_name: ${POD:node_name} +        pod_id: ${POD:uid} +        pod_ip: ${POD:ip} +        pod_name: ${POD:name} +        pod_namespace: ${POD:namespace_name} +        hostname: ${POD:hostname} +        host_ip: ${POD:host_ip} +        labels: ${POD:labels} +        type: pod +        collector: hawkular_openshift_agent +        custom_metric: true +  hawkular-openshift-agent: | +    endpoints: +    - type: prometheus +      protocol: "http" +      port: 8080 +      path: /metrics +      collection_interval: 30s +      metrics: +      - name: hawkular_openshift_agent_metric_data_points_collected_total +      - name: hawkular_openshift_agent_monitored_endpoints +      - name: hawkular_openshift_agent_monitored_pods +      - name: hawkular_openshift_agent_monitored_metrics diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 new file mode 100644 index 000000000..d65eaf9ae --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 @@ -0,0 +1,91 @@ +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: +  name: hawkular-openshift-agent +  labels: +    name: hawkular-openshift-agent +    metrics-infra: agent +  namespace: {{openshift_metrics_hawkular_agent_namespace}} +spec: +  selector: +    matchLabels: +      name: hawkular-openshift-agent +  template: +    metadata: +      labels: +        name: hawkular-openshift-agent +        metrics-infra: agent +    spec: +      serviceAccount: hawkular-openshift-agent +{% if node_selector is iterable and node_selector | length > 0 %} +      nodeSelector: +{% for key, value in node_selector.iteritems() %} +        {{key}}: "{{value}}" +{% endfor %} +{% endif %} +      containers: +      - image: {{openshift_metrics_image_prefix}}metrics-hawkular-openshift-agent:{{openshift_metrics_image_version}} +        imagePullPolicy: Always +        name: hawkular-openshift-agent +{% if ((openshift_metrics_hawkular_agent_limits_cpu is defined and openshift_metrics_hawkular_agent_limits_cpu is not none) +   or (openshift_metrics_hawkular_agent_limits_memory is defined and openshift_metrics_hawkular_agent_limits_memory is not none) +   or (openshift_metrics_hawkular_agent_requests_cpu is defined and openshift_metrics_hawkular_agent_requests_cpu is not none) +   or (openshift_metrics_hawkular_agent_requests_memory is defined and openshift_metrics_hawkular_agent_requests_memory is not none)) +%} +        resources: +{% if (openshift_metrics_hawkular_agent_limits_cpu is not none +   or openshift_metrics_hawkular_agent_limits_memory is not none) +%} +          limits: +{% if openshift_metrics_hawkular_agent_limits_cpu is not none %} +            cpu: "{{openshift_metrics_hawkular_agent_limits_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_limits_memory is not none %} +            memory: "{{openshift_metrics_hawkular_agent_limits_memory}}" +{% endif %} +{% endif %} +{% if (openshift_metrics_hawkular_agent_requests_cpu is not none +   or openshift_metrics_hawkular_agent_requests_memory is not none) +%} +          requests: +{% if openshift_metrics_hawkular_agent_requests_cpu is not none %} +            cpu: "{{openshift_metrics_hawkular_agent_requests_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_requests_memory is not none %} +            memory: "{{openshift_metrics_hawkular_agent_requests_memory}}" +{% endif %} +{% endif %} +{% endif %} + +        livenessProbe: +          httpGet: +            scheme: HTTP +            path: /health +            port: 8080 +          initialDelaySeconds: 30 +          periodSeconds: 30 +        command: +          - "hawkular-openshift-agent" +          - "-config" +          - "/hawkular-openshift-agent-configuration/config.yaml" +          - "-v" +          - "3" +        env: +        - name: K8S_POD_NAMESPACE +          valueFrom: +            fieldRef: +              fieldPath: metadata.namespace +        - name: K8S_POD_NAME +          valueFrom: +            fieldRef: +              fieldPath: metadata.name +        volumeMounts: +        - name: hawkular-openshift-agent-configuration +          mountPath: "/hawkular-openshift-agent-configuration" +      volumes: +      - name: hawkular-openshift-agent-configuration +        configMap: +          name: hawkular-openshift-agent-configuration +      - name: hawkular-openshift-agent +        configMap: +          name: hawkular-openshift-agent-configuration diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 new file mode 100644 index 000000000..24b8cd801 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: ClusterRole +metadata: +  name: hawkular-openshift-agent +  labels: +    metrics-infra: agent +rules: +- apiGroups: +  - "" +  resources: +  - configmaps +  - namespaces +  - nodes +  - pods +  - projects +  verbs: +  - get +  - list +  - watch +- apiGroups: +  - "" +  resources: +  - secrets +  verbs: +  - get diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 new file mode 100644 index 000000000..ec604d73c --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: +  name: hawkular-openshift-agent +  labels: +    metrics-infra: agent +  namespace: {{openshift_metrics_hawkular_agent_namespace}} diff --git a/roles/openshift_repos/tasks/main.yaml b/roles/openshift_repos/tasks/main.yaml index 023b1a9b7..8f8550e2d 100644 --- a/roles/openshift_repos/tasks/main.yaml +++ b/roles/openshift_repos/tasks/main.yaml @@ -4,7 +4,8 @@      path: /run/ostree-booted    register: ostree_booted -- block: +- when: not ostree_booted.stat.exists +  block:    - name: Ensure libselinux-python is installed      package: name=libselinux-python state=present @@ -24,41 +25,40 @@      - openshift_additional_repos | length == 0      notify: refresh cache -  # Note: OpenShift repositories under CentOS may be shipped through the -  # "centos-release-openshift-origin" package which configures the repository. -  # This task matches the file names provided by the package so that they are -  # not installed twice in different files and remains idempotent. -  - name: Configure origin gpg keys if needed -    copy: -      src: "{{ item.src }}" -      dest: "{{ item.dest }}" -    with_items: -    - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS -      dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS -    - src: origin/repos/openshift-ansible-centos-paas-sig.repo -      dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo -    notify: refresh cache -    when: -    - ansible_os_family == "RedHat" -    - ansible_distribution != "Fedora" -    - openshift_deployment_type == 'origin' -    - openshift_enable_origin_repo | default(true) | bool -    # Singleton block -  - when: r_osr_first_run | default(true) +  - when: r_openshift_repos_has_run is not defined      block: + +    # Note: OpenShift repositories under CentOS may be shipped through the +    # "centos-release-openshift-origin" package which configures the repository. +    # This task matches the file names provided by the package so that they are +    # not installed twice in different files and remains idempotent. +    - name: Configure origin gpg keys if needed +      copy: +        src: "{{ item.src }}" +        dest: "{{ item.dest }}" +      with_items: +      - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS +        dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS +      - src: origin/repos/openshift-ansible-centos-paas-sig.repo +        dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo +      notify: refresh cache +      when: +      - ansible_os_family == "RedHat" +      - ansible_distribution != "Fedora" +      - openshift_deployment_type == 'origin' +      - openshift_enable_origin_repo | default(true) | bool +      - name: Ensure clean repo cache in the event repos have been changed manually        debug:          msg: "First run of openshift_repos"        changed_when: true        notify: refresh cache -    - name: Set fact r_osr_first_run false +    - name: Record that openshift_repos already ran        set_fact: -        r_osr_first_run: false +        r_openshift_repos_has_run: True    # Force running ALL handlers now, because we expect repo cache to be cleared    # if changes have been made.    - meta: flush_handlers - -  when: not ostree_booted.stat.exists  | 
