From 921e73471674c2e4bc41722c61cbb51e8a2cc138 Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Thu, 28 May 2026 18:23:47 +0530 Subject: [PATCH 01/12] Increase verification pod health check timeout for OCP 4.22+ Co-authored-by: Cursor --- collection/stages/roles/verification/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/collection/stages/roles/verification/tasks/main.yml b/collection/stages/roles/verification/tasks/main.yml index ed7d1930..60821e6a 100644 --- a/collection/stages/roles/verification/tasks/main.yml +++ b/collection/stages/roles/verification/tasks/main.yml @@ -69,8 +69,8 @@ name: tools_cluster_checks tasks_from: check_pods.yml vars: - wait_retries: 20 - wait_delay: 10 + wait_retries: 30 + wait_delay: 30 - name: Check cluster alerts ansible.builtin.include_role: From 3a815f902c60376261008bec4ba75b33a1876d64 Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Wed, 20 May 2026 13:58:37 +0530 Subject: [PATCH 02/12] Add CPMS e2e test stage role and cpms_replace_attrs day2ops procedure --- .../stages/roles/cpms_test/defaults/main.yml | 9 + .../stages/roles/cpms_test/meta/main.yml | 3 + .../stages/roles/cpms_test/tasks/main.yml | 55 +++++ .../roles/cpms_test/tasks/run_cpms_test.yml | 83 +++++++ .../tasks/procedures/cpms_replace_attrs.yml | 228 ++++++++++++++++++ .../control-plane-machine-set-patch.yaml.j2 | 19 ++ jobs_definitions/4.17_ovnkubernetes_ipi.yaml | 8 + jobs_definitions/osp_verification.yaml | 1 + playbooks/ocp_testing.yaml | 4 + playbooks/plays/cpms_test.yaml | 17 ++ 10 files changed, 427 insertions(+) create mode 100644 collection/stages/roles/cpms_test/defaults/main.yml create mode 100644 collection/stages/roles/cpms_test/meta/main.yml create mode 100644 collection/stages/roles/cpms_test/tasks/main.yml create mode 100644 collection/stages/roles/cpms_test/tasks/run_cpms_test.yml create mode 100644 collection/stages/roles/day2ops/tasks/procedures/cpms_replace_attrs.yml create mode 100644 collection/stages/roles/day2ops/templates/control-plane-machine-set-patch.yaml.j2 create mode 100644 playbooks/plays/cpms_test.yaml diff --git a/collection/stages/roles/cpms_test/defaults/main.yml b/collection/stages/roles/cpms_test/defaults/main.yml new file mode 100644 index 00000000..4edfe241 --- /dev/null +++ b/collection/stages/roles/cpms_test/defaults/main.yml @@ -0,0 +1,9 @@ +--- +# defaults file for cpms_test +cpms_test_name: cluster-control-plane-machine-set-operator +cpms_test_repo_url: "https://github.com/openshift/{{ cpms_test_name }}.git" +cpms_test_dir: "{{ artifacts_dir }}/{{ cpms_test_name }}" +cpms_test_results_dir: "{{ artifacts_dir }}/cpms_test-results" +cpms_test_testsuite_name: cpms +cpms_tests_go_version: "{{ tests.default_go_version_target }}" +cpms_test_junit_filename: junit_control_plane_machine_set_operator.xml diff --git a/collection/stages/roles/cpms_test/meta/main.yml b/collection/stages/roles/cpms_test/meta/main.yml new file mode 100644 index 00000000..ce0638dc --- /dev/null +++ b/collection/stages/roles/cpms_test/meta/main.yml @@ -0,0 +1,3 @@ +--- +collections: + - shiftstack.tools diff --git a/collection/stages/roles/cpms_test/tasks/main.yml b/collection/stages/roles/cpms_test/tasks/main.yml new file mode 100644 index 00000000..336d41fc --- /dev/null +++ b/collection/stages/roles/cpms_test/tasks/main.yml @@ -0,0 +1,55 @@ +--- +# tasks file for cpms_test +- name: Install GO + ansible.builtin.include_role: + name: tools_openshift_tests + tasks_from: install_go.yml + +- name: Check if release branch exists for the CPMS operator repo + ansible.builtin.uri: + url: "https://github.com/openshift/{{ cpms_test_name }}/tree/release-{{ discovered_openshift_release }}" + register: cpms_branch_check + failed_when: false + +- name: Set the branch to clone based on availability + ansible.builtin.set_fact: + cpms_test_branch: >- + {% if cpms_branch_check.status == 200 %}release-{{ discovered_openshift_release }}{% else %}main{% endif %} + +- name: Clone openshift/{{ cpms_test_name }} repository at branch {{ cpms_test_branch }} + ansible.builtin.git: + repo: "{{ cpms_test_repo_url }}" + version: "{{ cpms_test_branch | trim }}" + dest: "{{ cpms_test_dir }}" + force: yes + depth: 1 + retries: 3 + delay: 5 + register: cpms_repo + until: cpms_repo is succeeded + +- name: Remove the test results directory if it exists + ansible.builtin.file: + path: "{{ cpms_test_results_dir }}" + state: absent + +- name: Create the test results directory + ansible.builtin.file: + path: "{{ cpms_test_results_dir }}" + state: directory + mode: u=rwx,g=rw,o=r + +- name: Run CPMS e2e-presubmit tests + ansible.builtin.include_tasks: run_cpms_test.yml + vars: + cpms_tests_type: e2e-presubmit + +- name: Run CPMS e2e-periodic tests + ansible.builtin.include_tasks: run_cpms_test.yml + vars: + cpms_tests_type: e2e-periodic + +- name: Remove the source directory after tests complete + ansible.builtin.file: + path: "{{ cpms_test_dir }}" + state: absent diff --git a/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml b/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml new file mode 100644 index 00000000..49d72187 --- /dev/null +++ b/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml @@ -0,0 +1,83 @@ +--- +- name: Set result paths for {{ cpms_tests_type }} + ansible.builtin.set_fact: + cpms_results_subdir: "{{ cpms_test_results_dir }}/{{ cpms_tests_type }}" + cpms_xml_file: "{{ cpms_test_results_dir }}/{{ cpms_tests_type }}/{{ cpms_test_junit_filename }}" + +- name: Create results directory for {{ cpms_tests_type }} + ansible.builtin.file: + path: "{{ cpms_results_subdir }}" + state: directory + mode: u=rwx,g=rw,o=r + +- name: Run {{ cpms_test_name }} {{ cpms_tests_type }} tests + block: + - name: Run make {{ cpms_tests_type }} + ansible.builtin.shell: | + source {{ home_dir }}/.bashrc + make {{ cpms_tests_type }} + args: + chdir: "{{ cpms_test_dir }}" + environment: + KUBECONFIG: "{{ kubeconfig }}" + OS_CLOUD: "{{ user_cloud }}" + OPENSHIFT_CI: "true" + ARTIFACT_DIR: "{{ cpms_results_subdir }}" + changed_when: true + register: cpms_test_output + + rescue: + - name: Mark the CPMS {{ cpms_tests_type }} tests as UNSTABLE + ansible.builtin.include_role: + name: tools_stage_results + tasks_from: mark_stage_unstable.yml + vars: + unstable_msg: >- + The {{ cpms_test_name }} {{ cpms_tests_type }} test suite failed. + + - name: Run must-gather + ansible.builtin.include_role: + name: tools_must-gather + vars: + must_gather_suffix: "cpms-{{ cpms_tests_type }}" + +- name: Post-process CPMS {{ cpms_tests_type }} results + when: cpms_xml_file is file + block: + - name: Rename JUnit XML for post-processing + ansible.builtin.copy: + src: "{{ cpms_xml_file }}" + dest: "{{ cpms_results_subdir }}/tmp.xml" + remote_src: yes + mode: u=rw,g=rw,o=r + + - name: Modify XML report tags for reporting + ansible.builtin.script: | + ../scripts/modifyE2ETags.py {{ cpms_results_subdir }}/tmp.xml {{ cpms_results_subdir }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.xml \ + cpms {{ cpms_test_testsuite_name }}_{{ cpms_tests_type }} \ + > {{ cpms_results_subdir }}/report_processing.log + + - name: Convert test results to HTML + ansible.builtin.shell: | + junit2html {{ cpms_results_subdir }}/tmp.xml {{ cpms_results_subdir }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.html + changed_when: true + + - name: Create report directory for {{ cpms_tests_type }} + ansible.builtin.file: + path: "{{ report_dir }}/{{ cpms_test_testsuite_name }}_{{ cpms_tests_type }}" + mode: u=rwx,g=rw,o=r + state: directory + + - name: Copy XML to report directory + ansible.builtin.copy: + src: "{{ cpms_results_subdir }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.xml" + dest: "{{ report_dir }}/{{ cpms_test_testsuite_name }}_{{ cpms_tests_type }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.xml" + remote_src: yes + mode: u=rwx,g=rwx,o=rwx + + - name: Copy HTML to report directory + ansible.builtin.copy: + src: "{{ cpms_results_subdir }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.html" + dest: "{{ report_dir }}/{{ cpms_test_testsuite_name }}_{{ cpms_tests_type }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.html" + remote_src: yes + mode: u=rwx,g=rwx,o=rwx diff --git a/collection/stages/roles/day2ops/tasks/procedures/cpms_replace_attrs.yml b/collection/stages/roles/day2ops/tasks/procedures/cpms_replace_attrs.yml new file mode 100644 index 00000000..95d0812c --- /dev/null +++ b/collection/stages/roles/day2ops/tasks/procedures/cpms_replace_attrs.yml @@ -0,0 +1,228 @@ +--- +# Procedure that updates the CPMS object by adding a fake extra network/subnet and SG to the +# masters and replacing failureDomain params on master-0 to have the same attributes as master-1. +# After reconciliation, master-0 and master-1 will have same nova AZ, cinderAZ, and volumeType. +- name: Set log directory for cpms_replace_attrs procedure + ansible.builtin.set_fact: + cpms_log_directory: "{{ artifacts_dir }}/cpms_replace_attrs" + +- name: Run cpms_replace_attrs procedure + block: + - name: Create {{ cpms_log_directory }} directory + ansible.builtin.file: + path: "{{ cpms_log_directory }}" + state: directory + mode: u=rwx,g=rw,o=r + + - name: Set artifact paths + ansible.builtin.set_fact: + cpms_orig_path: "{{ cpms_log_directory }}/orig_cpms.yaml" + cpms_patch_to_apply_path: "{{ cpms_log_directory }}/cpms_patch_to_apply.yaml" + cpms_applied_path: "{{ cpms_log_directory }}/applied_cpms.yaml" + cpms_patch_to_restore_path: "{{ cpms_log_directory }}/cpms_patch_to_restore_NOTAPPLIED.yaml" + + - name: Get original CPMS definition + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: machine.openshift.io/v1 + kind: ControlPlaneMachineSet + namespace: openshift-machine-api + register: original_cpms + + - name: Save the original CPMS definition + ansible.builtin.copy: + content: "{{ original_cpms.resources[0] | to_nice_yaml(indent=2) }}" + dest: "{{ cpms_orig_path }}" + mode: u=rw,g=rw,o=r + + - name: Create network {{ cpms_replacements.network_name }} + openstack.cloud.network: + cloud: "{{ user_cloud }}" + name: "{{ cpms_replacements.network_name }}" + state: present + register: network_to_add + + - name: Create subnet {{ cpms_replacements.subnet_name }} + openstack.cloud.subnet: + cloud: "{{ user_cloud }}" + name: "{{ cpms_replacements.subnet_name }}" + cidr: "{{ cpms_replacements.cidr }}" + network_name: "{{ cpms_replacements.network_name }}" + enable_dhcp: false + state: present + + - name: Create security group {{ cpms_replacements.sg_name }} + openstack.cloud.security_group: + cloud: "{{ user_cloud }}" + name: "{{ cpms_replacements.sg_name }}" + state: present + register: sg_to_add + + - name: Store original CPMS attributes that will be updated + ansible.builtin.set_fact: + orig_failuredomains: >- + {{ original_cpms.resources[0].spec.template.machines_v1beta1_machine_openshift_io.failureDomains.openstack | list }} + orig_networks: >- + {{ original_cpms.resources[0].spec.template.machines_v1beta1_machine_openshift_io.spec.providerSpec.value.networks | list }} + orig_sgs: >- + {{ original_cpms.resources[0].spec.template.machines_v1beta1_machine_openshift_io.spec.providerSpec.value.securityGroups | list }} + + - name: Create patch for CPMS + ansible.builtin.template: + src: control-plane-machine-set-patch.yaml.j2 + dest: "{{ cpms_patch_to_apply_path }}" + mode: u=rw,g=rw,o=r + vars: + cpms_failuredomains: >- + {{ orig_failuredomains | + replace(orig_failuredomains[0].availabilityZone, orig_failuredomains[1].availabilityZone) | + replace(orig_failuredomains[0].rootVolume.availabilityZone, orig_failuredomains[1].rootVolume.availabilityZone) | + replace(orig_failuredomains[0].rootVolume.volumeType, orig_failuredomains[1].rootVolume.volumeType) }} + cpms_networks: "{{ orig_networks + [{'filter': {}, 'uuid': network_to_add.id}] }}" + cpms_sgs: "{{ orig_sgs + [{'filter': {}, 'name': cpms_replacements.sg_name}] }}" + + - name: Apply patch for CPMS + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + apply: true + src: "{{ cpms_patch_to_apply_path }}" + + - name: Wait for CPMS reconciliation to complete + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_cpms_updated.yml + vars: + wait_retries: 180 + wait_delay: 60 + + - name: Check cluster health after CPMS patch application + block: + - name: Check the Control Plane MachineSet is healthy + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: check_controlplane_machinesets.yml + + - name: Wait until OCP cluster is healthy + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_cluster_is_healthy.yml + + - name: Get resulting CPMS definition after application + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: machine.openshift.io/v1 + kind: ControlPlaneMachineSet + namespace: openshift-machine-api + register: applied_cpms + + - name: Save the resulting CPMS after application + ansible.builtin.copy: + content: "{{ applied_cpms.resources[0] | to_nice_yaml(indent=2) }}" + dest: "{{ cpms_applied_path }}" + mode: u=rw,g=rw,o=r + + - name: Get all the master VMs after procedure + openstack.cloud.server_info: + cloud: "{{ user_cloud }}" + server: "*master*" + register: result + + - name: Store the master VMs after procedure + ansible.builtin.set_fact: + master_after: "{{ result.servers | sort(attribute='name') | list }}" + + - name: Get root volumes info + ansible.builtin.shell: > + openstack volume show {{ item }} -c type -c availability_zone -f shell + environment: + OS_CLOUD: "{{ user_cloud }}" + register: root_volumes_after + changed_when: false + with_items: + - "{{ master_after[0].volumes[0].id }}" + - "{{ master_after[1].volumes[0].id }}" + - "{{ master_after[2].volumes[0].id }}" + + - name: Confirm that master VMs reflect FailureDomain changes after procedure + ansible.builtin.assert: + that: + - master_after | length == 3 + - master_after | selectattr('location.zone', 'eq', expected_fd_az) | list | length == 2 + - root_volumes_after.results | selectattr('stdout', 'eq', + 'availability_zone="' + expected_fd_cinderaz + '"\ntype="' + expected_fd_volumeType + '"') | list | length == 2 + vars: + expected_fd_az: "{{ orig_failuredomains[1].availabilityZone }}" + expected_fd_cinderaz: "{{ orig_failuredomains[1].rootVolume.availabilityZone }}" + expected_fd_volumeType: "{{ orig_failuredomains[1].rootVolume.volumeType }}" + + - name: Confirm that the VMs include the added network and SG + ansible.builtin.assert: + that: + - cpms_replacements.network_name in item.addresses.keys() + - cpms_replacements.sg_name in item.security_groups | json_query('[*].name') + with_items: "{{ master_after }}" + + rescue: + - name: Run must-gather after cpms_replace_attrs failure + ansible.builtin.include_role: + name: tools_must-gather + vars: + must_gather_suffix: "cpms-replace-attrs" + + - name: Fail inside rescue block + ansible.builtin.fail: + msg: "The cpms_replace_attrs procedure failed" + + always: + - name: Create restore patch for CPMS + ansible.builtin.template: + src: control-plane-machine-set-patch.yaml.j2 + dest: "{{ cpms_patch_to_restore_path }}" + mode: u=rw,g=rw,o=r + vars: + cpms_failuredomains: "{{ orig_failuredomains }}" + cpms_networks: "{{ orig_networks }}" + cpms_sgs: "{{ orig_sgs }}" + + - name: Apply restore patch for CPMS + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + apply: true + src: "{{ cpms_patch_to_restore_path }}" + + - name: Wait for CPMS restore reconciliation to complete + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_cpms_updated.yml + vars: + wait_retries: 180 + wait_delay: 60 + + - name: Check cluster health after restore + block: + - name: Check the Control Plane MachineSet is healthy + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: check_controlplane_machinesets.yml + + - name: Wait until OCP cluster is healthy + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_cluster_is_healthy.yml + + - name: Clean up test OpenStack resources + block: + - name: Delete test network + openstack.cloud.network: + cloud: "{{ user_cloud }}" + name: "{{ cpms_replacements.network_name }}" + state: absent + + - name: Delete test security group + openstack.cloud.security_group: + cloud: "{{ user_cloud }}" + name: "{{ cpms_replacements.sg_name }}" + state: absent + ignore_errors: true diff --git a/collection/stages/roles/day2ops/templates/control-plane-machine-set-patch.yaml.j2 b/collection/stages/roles/day2ops/templates/control-plane-machine-set-patch.yaml.j2 new file mode 100644 index 00000000..0082d7da --- /dev/null +++ b/collection/stages/roles/day2ops/templates/control-plane-machine-set-patch.yaml.j2 @@ -0,0 +1,19 @@ +apiVersion: machine.openshift.io/v1 +metadata: + name: cluster + namespace: openshift-machine-api +kind: ControlPlaneMachineSet +spec: + template: + machines_v1beta1_machine_openshift_io: + failureDomains: + openstack: + {{cpms_failuredomains| to_nice_yaml| indent(8, false)}} + spec: + providerSpec: + value: + networks: + {{cpms_networks| to_nice_yaml| indent(12, false)}} + securityGroups: + {{cpms_sgs| to_nice_yaml| indent(12, false)}} +status: {} diff --git a/jobs_definitions/4.17_ovnkubernetes_ipi.yaml b/jobs_definitions/4.17_ovnkubernetes_ipi.yaml index c722992f..d948af1a 100644 --- a/jobs_definitions/4.17_ovnkubernetes_ipi.yaml +++ b/jobs_definitions/4.17_ovnkubernetes_ipi.yaml @@ -10,6 +10,7 @@ stages: - post - verification - day2ops + - cpms_test - openstack_test - conformance_test - cinder_csi_tests @@ -17,6 +18,13 @@ stages: day2ops_procedures: - moving-etcd-to-ephemeral + - cpms_replace_attrs + +cpms_replacements: + network_name: cpms-test-network + subnet_name: cpms-test-subnet + cidr: 192.168.240.0/24 + sg_name: cpms-test-sg ocp_deployment_topology: network_type: OVNKubernetes diff --git a/jobs_definitions/osp_verification.yaml b/jobs_definitions/osp_verification.yaml index 98d341b6..a4912650 100644 --- a/jobs_definitions/osp_verification.yaml +++ b/jobs_definitions/osp_verification.yaml @@ -19,6 +19,7 @@ stages: - install - post - verification + - cpms_test - openstack_test - lb_tests diff --git a/playbooks/ocp_testing.yaml b/playbooks/ocp_testing.yaml index ac386333..71715bbc 100644 --- a/playbooks/ocp_testing.yaml +++ b/playbooks/ocp_testing.yaml @@ -96,6 +96,10 @@ ansible.builtin.import_playbook: plays/conformance_test.yaml when: "'conformance_test' in stages" +- name: Run CPMS e2e tests on OpenShift + ansible.builtin.import_playbook: plays/cpms_test.yaml + when: "'cpms_test' in stages" + - name: Run Openshift Cinder CSI Tests on OpenShift ansible.builtin.import_playbook: plays/cinder_csi.yaml when: "'cinder_csi_tests' in stages" diff --git a/playbooks/plays/cpms_test.yaml b/playbooks/plays/cpms_test.yaml new file mode 100644 index 00000000..1c613a3f --- /dev/null +++ b/playbooks/plays/cpms_test.yaml @@ -0,0 +1,17 @@ +--- +- name: Run CPMS e2e tests on OpenShift + hosts: installer + gather_facts: no + vars_files: + - "../../configs/global.yml" + tasks: + - name: Main block + block: + - name: Run CPMS e2e tests on OCP + ansible.builtin.include_role: + name: shiftstack.stages.cpms_test + always: + - name: Synchronize artifacts from the Ansible Managed Node to Ansible Controller + ansible.builtin.include_role: + name: shiftstack.tools.tools_ansible_inventory + tasks_from: sync_artifacts.yml From 34cd493f10d01f4b65f8c955a34689a7c1718f73 Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Wed, 20 May 2026 14:46:45 +0530 Subject: [PATCH 03/12] Simplify cpms_test role to use shared prepare/post tasks and remove branch fallback --- .../stages/roles/cpms_test/defaults/main.yml | 1 - .../stages/roles/cpms_test/tasks/main.yml | 42 ++----------- .../roles/cpms_test/tasks/run_cpms_test.yml | 60 ++++++------------- 3 files changed, 23 insertions(+), 80 deletions(-) diff --git a/collection/stages/roles/cpms_test/defaults/main.yml b/collection/stages/roles/cpms_test/defaults/main.yml index 4edfe241..b9bc650e 100644 --- a/collection/stages/roles/cpms_test/defaults/main.yml +++ b/collection/stages/roles/cpms_test/defaults/main.yml @@ -1,7 +1,6 @@ --- # defaults file for cpms_test cpms_test_name: cluster-control-plane-machine-set-operator -cpms_test_repo_url: "https://github.com/openshift/{{ cpms_test_name }}.git" cpms_test_dir: "{{ artifacts_dir }}/{{ cpms_test_name }}" cpms_test_results_dir: "{{ artifacts_dir }}/cpms_test-results" cpms_test_testsuite_name: cpms diff --git a/collection/stages/roles/cpms_test/tasks/main.yml b/collection/stages/roles/cpms_test/tasks/main.yml index 336d41fc..0fedc0ba 100644 --- a/collection/stages/roles/cpms_test/tasks/main.yml +++ b/collection/stages/roles/cpms_test/tasks/main.yml @@ -1,43 +1,13 @@ --- # tasks file for cpms_test -- name: Install GO +- name: Prepare CPMS test ansible.builtin.include_role: name: tools_openshift_tests - tasks_from: install_go.yml - -- name: Check if release branch exists for the CPMS operator repo - ansible.builtin.uri: - url: "https://github.com/openshift/{{ cpms_test_name }}/tree/release-{{ discovered_openshift_release }}" - register: cpms_branch_check - failed_when: false - -- name: Set the branch to clone based on availability - ansible.builtin.set_fact: - cpms_test_branch: >- - {% if cpms_branch_check.status == 200 %}release-{{ discovered_openshift_release }}{% else %}main{% endif %} - -- name: Clone openshift/{{ cpms_test_name }} repository at branch {{ cpms_test_branch }} - ansible.builtin.git: - repo: "{{ cpms_test_repo_url }}" - version: "{{ cpms_test_branch | trim }}" - dest: "{{ cpms_test_dir }}" - force: yes - depth: 1 - retries: 3 - delay: 5 - register: cpms_repo - until: cpms_repo is succeeded - -- name: Remove the test results directory if it exists - ansible.builtin.file: - path: "{{ cpms_test_results_dir }}" - state: absent - -- name: Create the test results directory - ansible.builtin.file: - path: "{{ cpms_test_results_dir }}" - state: directory - mode: u=rwx,g=rw,o=r + tasks_from: prepare_openshift_tests.yml + vars: + repo_name: "{{ cpms_test_name }}" + results_dir: "{{ cpms_test_results_dir }}" + go_version_target: "{{ cpms_tests_go_version }}" - name: Run CPMS e2e-presubmit tests ansible.builtin.include_tasks: run_cpms_test.yml diff --git a/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml b/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml index 49d72187..062736b1 100644 --- a/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml +++ b/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml @@ -2,7 +2,6 @@ - name: Set result paths for {{ cpms_tests_type }} ansible.builtin.set_fact: cpms_results_subdir: "{{ cpms_test_results_dir }}/{{ cpms_tests_type }}" - cpms_xml_file: "{{ cpms_test_results_dir }}/{{ cpms_tests_type }}/{{ cpms_test_junit_filename }}" - name: Create results directory for {{ cpms_tests_type }} ansible.builtin.file: @@ -24,7 +23,23 @@ OPENSHIFT_CI: "true" ARTIFACT_DIR: "{{ cpms_results_subdir }}" changed_when: true - register: cpms_test_output + + - name: Rename JUnit XML to match expected prefix + ansible.builtin.copy: + src: "{{ cpms_results_subdir }}/{{ cpms_test_junit_filename }}" + dest: "{{ cpms_results_subdir }}/junit_e2e_{{ cpms_tests_type }}.xml" + remote_src: yes + mode: u=rw,g=rw,o=r + + - name: Post openshift-test + ansible.builtin.include_role: + name: tools_openshift_tests + tasks_from: post_openshift_tests.yml + vars: + testsuite_name: "{{ cpms_test_testsuite_name }}_{{ cpms_tests_type }}" + key_for_filtering_results: "cpms" + test_name: "{{ cpms_test_name }}-{{ cpms_tests_type }}" + results_dir: "{{ cpms_results_subdir }}" rescue: - name: Mark the CPMS {{ cpms_tests_type }} tests as UNSTABLE @@ -40,44 +55,3 @@ name: tools_must-gather vars: must_gather_suffix: "cpms-{{ cpms_tests_type }}" - -- name: Post-process CPMS {{ cpms_tests_type }} results - when: cpms_xml_file is file - block: - - name: Rename JUnit XML for post-processing - ansible.builtin.copy: - src: "{{ cpms_xml_file }}" - dest: "{{ cpms_results_subdir }}/tmp.xml" - remote_src: yes - mode: u=rw,g=rw,o=r - - - name: Modify XML report tags for reporting - ansible.builtin.script: | - ../scripts/modifyE2ETags.py {{ cpms_results_subdir }}/tmp.xml {{ cpms_results_subdir }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.xml \ - cpms {{ cpms_test_testsuite_name }}_{{ cpms_tests_type }} \ - > {{ cpms_results_subdir }}/report_processing.log - - - name: Convert test results to HTML - ansible.builtin.shell: | - junit2html {{ cpms_results_subdir }}/tmp.xml {{ cpms_results_subdir }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.html - changed_when: true - - - name: Create report directory for {{ cpms_tests_type }} - ansible.builtin.file: - path: "{{ report_dir }}/{{ cpms_test_testsuite_name }}_{{ cpms_tests_type }}" - mode: u=rwx,g=rw,o=r - state: directory - - - name: Copy XML to report directory - ansible.builtin.copy: - src: "{{ cpms_results_subdir }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.xml" - dest: "{{ report_dir }}/{{ cpms_test_testsuite_name }}_{{ cpms_tests_type }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.xml" - remote_src: yes - mode: u=rwx,g=rwx,o=rwx - - - name: Copy HTML to report directory - ansible.builtin.copy: - src: "{{ cpms_results_subdir }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.html" - dest: "{{ report_dir }}/{{ cpms_test_testsuite_name }}_{{ cpms_tests_type }}/{{ cpms_test_name }}-{{ cpms_tests_type }}.html" - remote_src: yes - mode: u=rwx,g=rwx,o=rwx From e8657d99c01beec6c40654c9b23f9a20ff07336b Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Mon, 25 May 2026 16:58:15 +0530 Subject: [PATCH 04/12] Address review: remove cpms_test from Jenkins job and drop redundant inner rescue Co-authored-by: Cursor --- .../day2ops/tasks/procedures/cpms_replace_attrs.yml | 11 ----------- jobs_definitions/4.17_ovnkubernetes_ipi.yaml | 8 -------- 2 files changed, 19 deletions(-) diff --git a/collection/stages/roles/day2ops/tasks/procedures/cpms_replace_attrs.yml b/collection/stages/roles/day2ops/tasks/procedures/cpms_replace_attrs.yml index 95d0812c..4bdf2a46 100644 --- a/collection/stages/roles/day2ops/tasks/procedures/cpms_replace_attrs.yml +++ b/collection/stages/roles/day2ops/tasks/procedures/cpms_replace_attrs.yml @@ -163,17 +163,6 @@ - cpms_replacements.sg_name in item.security_groups | json_query('[*].name') with_items: "{{ master_after }}" - rescue: - - name: Run must-gather after cpms_replace_attrs failure - ansible.builtin.include_role: - name: tools_must-gather - vars: - must_gather_suffix: "cpms-replace-attrs" - - - name: Fail inside rescue block - ansible.builtin.fail: - msg: "The cpms_replace_attrs procedure failed" - always: - name: Create restore patch for CPMS ansible.builtin.template: diff --git a/jobs_definitions/4.17_ovnkubernetes_ipi.yaml b/jobs_definitions/4.17_ovnkubernetes_ipi.yaml index d948af1a..c722992f 100644 --- a/jobs_definitions/4.17_ovnkubernetes_ipi.yaml +++ b/jobs_definitions/4.17_ovnkubernetes_ipi.yaml @@ -10,7 +10,6 @@ stages: - post - verification - day2ops - - cpms_test - openstack_test - conformance_test - cinder_csi_tests @@ -18,13 +17,6 @@ stages: day2ops_procedures: - moving-etcd-to-ephemeral - - cpms_replace_attrs - -cpms_replacements: - network_name: cpms-test-network - subnet_name: cpms-test-subnet - cidr: 192.168.240.0/24 - sg_name: cpms-test-sg ocp_deployment_topology: network_type: OVNKubernetes From cce04e6017475172eec71cc5070b818a29f7bd01 Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Thu, 28 May 2026 19:27:56 +0530 Subject: [PATCH 05/12] Move post-processing outside block/rescue so results are reported even on test failure --- .../roles/cpms_test/tasks/run_cpms_test.yml | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml b/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml index 062736b1..e144f33f 100644 --- a/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml +++ b/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml @@ -24,23 +24,6 @@ ARTIFACT_DIR: "{{ cpms_results_subdir }}" changed_when: true - - name: Rename JUnit XML to match expected prefix - ansible.builtin.copy: - src: "{{ cpms_results_subdir }}/{{ cpms_test_junit_filename }}" - dest: "{{ cpms_results_subdir }}/junit_e2e_{{ cpms_tests_type }}.xml" - remote_src: yes - mode: u=rw,g=rw,o=r - - - name: Post openshift-test - ansible.builtin.include_role: - name: tools_openshift_tests - tasks_from: post_openshift_tests.yml - vars: - testsuite_name: "{{ cpms_test_testsuite_name }}_{{ cpms_tests_type }}" - key_for_filtering_results: "cpms" - test_name: "{{ cpms_test_name }}-{{ cpms_tests_type }}" - results_dir: "{{ cpms_results_subdir }}" - rescue: - name: Mark the CPMS {{ cpms_tests_type }} tests as UNSTABLE ansible.builtin.include_role: @@ -55,3 +38,22 @@ name: tools_must-gather vars: must_gather_suffix: "cpms-{{ cpms_tests_type }}" + +- name: Rename JUnit XML to match expected prefix + ansible.builtin.copy: + src: "{{ cpms_results_subdir }}/{{ cpms_test_junit_filename }}" + dest: "{{ cpms_results_subdir }}/junit_e2e_{{ cpms_tests_type }}.xml" + remote_src: yes + mode: u=rw,g=rw,o=r + when: cpms_results_subdir + '/' + cpms_test_junit_filename is file + +- name: Post openshift-test + ansible.builtin.include_role: + name: tools_openshift_tests + tasks_from: post_openshift_tests.yml + vars: + testsuite_name: "{{ cpms_test_testsuite_name }}_{{ cpms_tests_type }}" + key_for_filtering_results: "cpms" + test_name: "{{ cpms_test_name }}-{{ cpms_tests_type }}" + results_dir: "{{ cpms_results_subdir }}" + when: cpms_results_subdir + '/' + cpms_test_junit_filename is file From da7c3dfa9ecc82a65c501e599c0dfb9e17e3d26a Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Thu, 4 Jun 2026 10:53:24 +0530 Subject: [PATCH 06/12] Fix Jinja2 operator precedence in when condition for JUnit file check Co-authored-by: Cursor --- collection/stages/roles/cpms_test/tasks/run_cpms_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml b/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml index e144f33f..1fee2206 100644 --- a/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml +++ b/collection/stages/roles/cpms_test/tasks/run_cpms_test.yml @@ -45,7 +45,7 @@ dest: "{{ cpms_results_subdir }}/junit_e2e_{{ cpms_tests_type }}.xml" remote_src: yes mode: u=rw,g=rw,o=r - when: cpms_results_subdir + '/' + cpms_test_junit_filename is file + when: (cpms_results_subdir + '/' + cpms_test_junit_filename) is file - name: Post openshift-test ansible.builtin.include_role: @@ -56,4 +56,4 @@ key_for_filtering_results: "cpms" test_name: "{{ cpms_test_name }}-{{ cpms_tests_type }}" results_dir: "{{ cpms_results_subdir }}" - when: cpms_results_subdir + '/' + cpms_test_junit_filename is file + when: (cpms_results_subdir + '/' + cpms_test_junit_filename) is file From 581c970813a0e24b18d9400797505ef94070d72c Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Wed, 17 Jun 2026 18:06:43 +0530 Subject: [PATCH 07/12] Increase ShiftStack master RAM to 24GB to reduce bootstrap timeout race Co-authored-by: Cursor --- jobs_definitions/osp_verification.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobs_definitions/osp_verification.yaml b/jobs_definitions/osp_verification.yaml index 98d341b6..aa07ad3d 100644 --- a/jobs_definitions/osp_verification.yaml +++ b/jobs_definitions/osp_verification.yaml @@ -38,7 +38,7 @@ ocp_deployment_topology: flavors: master: name: "master" - ram: 16384 + ram: 24576 vcpus: 4 disk: 30 worker: From bda356e1458db6b0c34794cb1f1c3882037c11b6 Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Thu, 18 Jun 2026 12:49:10 +0530 Subject: [PATCH 08/12] Use oc adm release extract --tools instead of unreliable file-cache for OCP binaries Co-authored-by: Cursor --- .../defaults/main.yml | 1 - .../tasks/get_openshift_release_binaries.yml | 71 ++++++++++++------- .../get_openshift_release_build_name.yml | 28 +++++++- 3 files changed, 70 insertions(+), 30 deletions(-) diff --git a/collection/tools/roles/tools_get_openshift_release/defaults/main.yml b/collection/tools/roles/tools_get_openshift_release/defaults/main.yml index f54042ac..64f604c6 100644 --- a/collection/tools/roles/tools_get_openshift_release/defaults/main.yml +++ b/collection/tools/roles/tools_get_openshift_release/defaults/main.yml @@ -2,6 +2,5 @@ # defaults file for tools_get_openshift_release openshift_releasestream_url: "https://openshift-release.apps.ci.l2s4.p1.openshiftapps.com/api/v1/releasestream" release_name: "{{ openshift_release_build_name | default('') }}" -openshift_download_url: "{{ 'https://openshift-release-artifacts.apps.ci.l2s4.p1.openshiftapps.com' + '/' + release_name }}" openshift_mirror_url: "https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp" ocp_build_info_file: "{{ controller_home_dir }}/latest_build.json" diff --git a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml index a2515771..ce4dc41f 100644 --- a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml +++ b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml @@ -1,25 +1,33 @@ --- +# Extract OCP installer and/or client binaries directly from the release image +# using `oc adm release extract --tools` instead of the release-controller's +# file-cache (openshift-release-artifacts), which has no SLA and can get stuck +# indefinitely during tool extraction. - name: Get the OCP installer and/or client binaries vars: - installer_url: "{{ openshift_download_url }}/openshift-install-linux-{{ release_name }}.tar.gz" - client_url: "{{ openshift_download_url }}/openshift-client-linux-{{ release_name }}.tar.gz" + installer_tarball: "openshift-install-linux-{{ release_name }}.tar.gz" + client_tarball: "openshift-client-linux-{{ release_name }}.tar.gz" + pull_secret_file: "{{ home_dir }}/pull-secret.json" block: - name: Fail if release_name var is not defined ansible.builtin.fail: msg: "'release_name' variable must be defined and cannot be empty" when: release_name == '' - - name: Wait for content to come up on {{ openshift_download_url }} - ansible.builtin.uri: - url: "{{ openshift_download_url }}" - method: GET - return_content: yes - status_code: 200 - body_format: json - register: result - until: result.content.find("openshift-install-linux") != -1 - retries: 20 - delay: 60 + - name: Fail if openshift_release_pull_spec is not defined + ansible.builtin.fail: + msg: "'openshift_release_pull_spec' must be set by get_openshift_release_build_name.yml" + when: openshift_release_pull_spec is not defined or openshift_release_pull_spec == '' + + - name: Extract pull secret from host cluster + ansible.builtin.shell: >- + set -o pipefail && + oc get secret pull-secret -n openshift-config + --kubeconfig={{ rhoso_kubeconfig }} + -o jsonpath='{.data.\.dockerconfigjson}' + | base64 -d > {{ pull_secret_file }} + changed_when: true + no_log: true - name: Create the installer directory ansible.builtin.file: @@ -27,18 +35,27 @@ state: directory mode: u=rwx,g=rw,o=r + - name: Extract OCP tools from release image {{ openshift_release_pull_spec }} + ansible.builtin.command: + cmd: >- + oc adm release extract + --tools + --registry-config={{ pull_secret_file }} + --to={{ home_dir }}/{{ release_name }} + {{ openshift_release_pull_spec }} + register: extract_result + until: extract_result is not failed + retries: 3 + delay: 30 + - name: Get the installer binary and create a symlink when: "'installer' in binaries" block: - - name: Download and unarchive the installer from {{ installer_url }} + - name: Unarchive the installer from {{ installer_tarball }} ansible.builtin.unarchive: - src: "{{ installer_url }}" + src: "{{ home_dir }}/{{ release_name }}/{{ installer_tarball }}" dest: "{{ home_dir }}/{{ release_name }}" remote_src: yes - register: result - until: result is not failed - retries: 3 - delay: 10 - name: Create a symlink to the openshift-install binary from /usr/local/bin ansible.builtin.file: @@ -47,18 +64,14 @@ state: link become: true - - name: Get the installer binary and create symlinks + - name: Get the client binary and create symlinks when: "'client' in binaries" block: - - name: Download and unarchive the client from {{ client_url }} + - name: Unarchive the client from {{ client_tarball }} ansible.builtin.unarchive: - src: "{{ client_url }}" + src: "{{ home_dir }}/{{ release_name }}/{{ client_tarball }}" dest: "{{ home_dir }}/{{ release_name }}" remote_src: yes - register: result - until: result is not failed - retries: 3 - delay: 10 - name: Create a symlink to the oc binary from /usr/local/bin ansible.builtin.file: @@ -73,3 +86,9 @@ dest: /usr/bin/kubectl state: link become: true + + always: + - name: Remove pull secret file + ansible.builtin.file: + path: "{{ pull_secret_file }}" + state: absent diff --git a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_build_name.yml b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_build_name.yml index 78c07a79..7461eb58 100644 --- a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_build_name.yml +++ b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_build_name.yml @@ -39,12 +39,25 @@ ansible.builtin.set_fact: openshift_release_build_name: "{{ latest_build_info.name }}" -- name: Set openshift_release_build_name when a specific build is given - ansible.builtin.set_fact: - openshift_release_build_name: "{{ build_name }}" + - name: Set openshift_release_pull_spec from release stream API response + ansible.builtin.set_fact: + openshift_release_pull_spec: "{{ latest_build_info.pullSpec }}" + +- name: Set build name and pull spec when a specific build is given when: - release is not match("4-stable") - build_name not in ['','candidate','fast','stable','eus'] + block: + - name: Set openshift_release_build_name for specific build + ansible.builtin.set_fact: + openshift_release_build_name: "{{ build_name }}" + + - name: Construct openshift_release_pull_spec for specific build + ansible.builtin.set_fact: + openshift_release_pull_spec: >- + {{ 'registry.ci.openshift.org/ocp/release:' + build_name + if build_name is search('nightly') + else 'quay.io/openshift-release-dev/ocp-release:' + build_name + '-x86_64' }} - name: Discover the release build name for the z-stream promoted to upgrade channel on {{ release }} # Ref: https://docs.openshift.com/container-platform/4.9/updating/understanding-upgrade-channels-release.html @@ -68,3 +81,12 @@ - name: Set openshift_release_build_name when openshift.build is set to a channel ansible.builtin.set_fact: openshift_release_build_name: "{{ result.stdout }}" + + - name: Parse openshift_release_pull_spec from Pull From field in release.txt + ansible.builtin.shell: set -o pipefail && grep '^Pull From:' {{ home_dir }}/release.txt | awk '{print $3}' + changed_when: false + register: pull_from_result + + - name: Set openshift_release_pull_spec from channel release.txt + ansible.builtin.set_fact: + openshift_release_pull_spec: "{{ pull_from_result.stdout }}" From 7756aeddc494dd8119933014f52187c40eff4e4b Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Mon, 22 Jun 2026 19:22:53 +0530 Subject: [PATCH 09/12] Add pull secret extraction fallback and diagnostics for CI debugging Co-authored-by: Cursor --- .../tasks/get_openshift_release_binaries.yml | 45 ++++++++++++++++++- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml index ce4dc41f..6c97c85f 100644 --- a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml +++ b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml @@ -19,15 +19,56 @@ msg: "'openshift_release_pull_spec' must be set by get_openshift_release_build_name.yml" when: openshift_release_pull_spec is not defined or openshift_release_pull_spec == '' - - name: Extract pull secret from host cluster + - name: Check if rhoso_kubeconfig file exists + ansible.builtin.stat: + path: "{{ rhoso_kubeconfig }}" + register: _rhoso_kubeconfig_stat + + - name: Debug kubeconfig and environment info + ansible.builtin.debug: + msg: | + rhoso_kubeconfig path: {{ rhoso_kubeconfig }} + rhoso_kubeconfig exists: {{ _rhoso_kubeconfig_stat.stat.exists }} + home_dir: {{ home_dir }} + KUBECONFIG env: {{ lookup('ansible.builtin.env', 'KUBECONFIG', default='(not set)') }} + + - name: Extract pull secret using rhoso_kubeconfig ansible.builtin.shell: >- set -o pipefail && oc get secret pull-secret -n openshift-config --kubeconfig={{ rhoso_kubeconfig }} -o jsonpath='{.data.\.dockerconfigjson}' | base64 -d > {{ pull_secret_file }} - changed_when: true + register: _pull_secret_rhoso + ignore_errors: true + no_log: true + when: _rhoso_kubeconfig_stat.stat.exists + + - name: Extract pull secret using default kubeconfig (fallback) + ansible.builtin.shell: >- + set -o pipefail && + oc get secret pull-secret -n openshift-config + -o jsonpath='{.data.\.dockerconfigjson}' + | base64 -d > {{ pull_secret_file }} + register: _pull_secret_default + ignore_errors: true no_log: true + when: _rhoso_kubeconfig_stat.stat.exists == false or _pull_secret_rhoso is failed + + - name: Verify pull secret file was created and is valid JSON + ansible.builtin.shell: python3 -c "import json; d=json.load(open('{{ pull_secret_file }}')); print(len(d.get('auths',{})), 'registries found')" + register: _pull_secret_verify + ignore_errors: true + + - name: Fail with diagnostic info if pull secret extraction failed + ansible.builtin.fail: + msg: | + Failed to extract pull secret from host cluster. + rhoso_kubeconfig exists: {{ _rhoso_kubeconfig_stat.stat.exists }} + rhoso_kubeconfig result: {{ 'skipped' if _pull_secret_rhoso is skipped else ('ok' if _pull_secret_rhoso is success else 'FAILED rc=' + (_pull_secret_rhoso.rc | default('?') | string)) }} + default kubeconfig result: {{ 'skipped' if _pull_secret_default is skipped else ('ok' if _pull_secret_default is success else 'FAILED rc=' + (_pull_secret_default.rc | default('?') | string)) }} + pull secret validation: {{ _pull_secret_verify.stdout | default('FAILED - ' + _pull_secret_verify.stderr | default('unknown error')) }} + when: _pull_secret_verify is failed - name: Create the installer directory ansible.builtin.file: From 35985b8d870176703e9ef995062874a96aeec5bc Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Tue, 23 Jun 2026 10:43:21 +0530 Subject: [PATCH 10/12] Use curl + kubeconfig certs for pull secret and bootstrap oc from mirror to eliminate file-cache dependency --- .../tasks/get_openshift_release_binaries.yml | 126 ++++++++++++------ 1 file changed, 82 insertions(+), 44 deletions(-) diff --git a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml index 6c97c85f..fa407636 100644 --- a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml +++ b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml @@ -3,11 +3,18 @@ # using `oc adm release extract --tools` instead of the release-controller's # file-cache (openshift-release-artifacts), which has no SLA and can get stuck # indefinitely during tool extraction. +# +# Since the shiftstackclient pod starts with no `oc` binary, we bootstrap one +# from mirror.openshift.com, extract the pull secret from the host cluster via +# the Kubernetes API (using the kubeconfig's client certificate), then use +# `oc adm release extract --tools` to get the version-matched binaries. - name: Get the OCP installer and/or client binaries vars: installer_tarball: "openshift-install-linux-{{ release_name }}.tar.gz" client_tarball: "openshift-client-linux-{{ release_name }}.tar.gz" pull_secret_file: "{{ home_dir }}/pull-secret.json" + bootstrap_oc_dir: "{{ home_dir }}/bootstrap-oc" + bootstrap_oc_url: "{{ openshift_mirror_url }}/stable/openshift-client-linux.tar.gz" block: - name: Fail if release_name var is not defined ansible.builtin.fail: @@ -19,56 +26,82 @@ msg: "'openshift_release_pull_spec' must be set by get_openshift_release_build_name.yml" when: openshift_release_pull_spec is not defined or openshift_release_pull_spec == '' - - name: Check if rhoso_kubeconfig file exists - ansible.builtin.stat: - path: "{{ rhoso_kubeconfig }}" - register: _rhoso_kubeconfig_stat + - name: Extract pull secret from host cluster via Kubernetes API + ansible.builtin.script: + cmd: python3 - + stdin: | + import yaml, json, base64, subprocess, os, sys - - name: Debug kubeconfig and environment info - ansible.builtin.debug: - msg: | - rhoso_kubeconfig path: {{ rhoso_kubeconfig }} - rhoso_kubeconfig exists: {{ _rhoso_kubeconfig_stat.stat.exists }} - home_dir: {{ home_dir }} - KUBECONFIG env: {{ lookup('ansible.builtin.env', 'KUBECONFIG', default='(not set)') }} + kubeconfig_path = "{{ rhoso_kubeconfig }}" + output_path = "{{ pull_secret_file }}" - - name: Extract pull secret using rhoso_kubeconfig - ansible.builtin.shell: >- - set -o pipefail && - oc get secret pull-secret -n openshift-config - --kubeconfig={{ rhoso_kubeconfig }} - -o jsonpath='{.data.\.dockerconfigjson}' - | base64 -d > {{ pull_secret_file }} - register: _pull_secret_rhoso - ignore_errors: true - no_log: true - when: _rhoso_kubeconfig_stat.stat.exists + with open(kubeconfig_path) as f: + kc = yaml.safe_load(f) - - name: Extract pull secret using default kubeconfig (fallback) - ansible.builtin.shell: >- - set -o pipefail && - oc get secret pull-secret -n openshift-config - -o jsonpath='{.data.\.dockerconfigjson}' - | base64 -d > {{ pull_secret_file }} - register: _pull_secret_default - ignore_errors: true + server = kc['clusters'][0]['cluster']['server'] + user = kc['users'][0]['user'] + + ca_path = '/tmp/k8s-ca.crt' + cert_path = '/tmp/k8s-client.crt' + key_path = '/tmp/k8s-client.key' + + with open(ca_path, 'wb') as f: + f.write(base64.b64decode(kc['clusters'][0]['cluster']['certificate-authority-data'])) + with open(cert_path, 'wb') as f: + f.write(base64.b64decode(user['client-certificate-data'])) + with open(key_path, 'wb') as f: + f.write(base64.b64decode(user['client-key-data'])) + + result = subprocess.run([ + 'curl', '-s', '--fail', + '--cacert', ca_path, + '--cert', cert_path, + '--key', key_path, + f'{server}/api/v1/namespaces/openshift-config/secrets/pull-secret' + ], capture_output=True, text=True) + + for f in [ca_path, cert_path, key_path]: + os.remove(f) + + if result.returncode != 0: + print(f"Failed to fetch pull secret from {server}: {result.stderr}", file=sys.stderr) + sys.exit(1) + + data = json.loads(result.stdout) + decoded = base64.b64decode(data['data']['.dockerconfigjson']).decode() + auths = json.loads(decoded) + + with open(output_path, 'w') as f: + f.write(decoded) + + print(f"Pull secret extracted: {len(auths.get('auths', {}))} registries") + register: _pull_secret_result no_log: true - when: _rhoso_kubeconfig_stat.stat.exists == false or _pull_secret_rhoso is failed - - name: Verify pull secret file was created and is valid JSON - ansible.builtin.shell: python3 -c "import json; d=json.load(open('{{ pull_secret_file }}')); print(len(d.get('auths',{})), 'registries found')" + - name: Verify pull secret file is valid + ansible.builtin.shell: >- + python3 -c "import json; d=json.load(open('{{ pull_secret_file }}')); + print(len(d.get('auths',{})), 'registries found')" register: _pull_secret_verify - ignore_errors: true + changed_when: false - - name: Fail with diagnostic info if pull secret extraction failed - ansible.builtin.fail: - msg: | - Failed to extract pull secret from host cluster. - rhoso_kubeconfig exists: {{ _rhoso_kubeconfig_stat.stat.exists }} - rhoso_kubeconfig result: {{ 'skipped' if _pull_secret_rhoso is skipped else ('ok' if _pull_secret_rhoso is success else 'FAILED rc=' + (_pull_secret_rhoso.rc | default('?') | string)) }} - default kubeconfig result: {{ 'skipped' if _pull_secret_default is skipped else ('ok' if _pull_secret_default is success else 'FAILED rc=' + (_pull_secret_default.rc | default('?') | string)) }} - pull secret validation: {{ _pull_secret_verify.stdout | default('FAILED - ' + _pull_secret_verify.stderr | default('unknown error')) }} - when: _pull_secret_verify is failed + - name: Bootstrap oc client from {{ bootstrap_oc_url }} + block: + - name: Create bootstrap directory + ansible.builtin.file: + path: "{{ bootstrap_oc_dir }}" + state: directory + mode: u=rwx,g=rw,o=r + + - name: Download stable oc client from mirror + ansible.builtin.unarchive: + src: "{{ bootstrap_oc_url }}" + dest: "{{ bootstrap_oc_dir }}" + remote_src: yes + register: _bootstrap_download + until: _bootstrap_download is not failed + retries: 3 + delay: 10 - name: Create the installer directory ansible.builtin.file: @@ -79,7 +112,7 @@ - name: Extract OCP tools from release image {{ openshift_release_pull_spec }} ansible.builtin.command: cmd: >- - oc adm release extract + {{ bootstrap_oc_dir }}/oc adm release extract --tools --registry-config={{ pull_secret_file }} --to={{ home_dir }}/{{ release_name }} @@ -133,3 +166,8 @@ ansible.builtin.file: path: "{{ pull_secret_file }}" state: absent + + - name: Remove bootstrap oc directory + ansible.builtin.file: + path: "{{ bootstrap_oc_dir }}" + state: absent From 6c996c32fbd5bdbf640413d3e204d7ea6f0b0c38 Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Tue, 23 Jun 2026 14:14:15 +0530 Subject: [PATCH 11/12] Fix pull secret extraction to use shell heredoc instead of script module Co-authored-by: Cursor --- .../tasks/get_openshift_release_binaries.yml | 96 +++++++++---------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml index fa407636..baf423da 100644 --- a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml +++ b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml @@ -27,54 +27,54 @@ when: openshift_release_pull_spec is not defined or openshift_release_pull_spec == '' - name: Extract pull secret from host cluster via Kubernetes API - ansible.builtin.script: - cmd: python3 - - stdin: | - import yaml, json, base64, subprocess, os, sys - - kubeconfig_path = "{{ rhoso_kubeconfig }}" - output_path = "{{ pull_secret_file }}" - - with open(kubeconfig_path) as f: - kc = yaml.safe_load(f) - - server = kc['clusters'][0]['cluster']['server'] - user = kc['users'][0]['user'] - - ca_path = '/tmp/k8s-ca.crt' - cert_path = '/tmp/k8s-client.crt' - key_path = '/tmp/k8s-client.key' - - with open(ca_path, 'wb') as f: - f.write(base64.b64decode(kc['clusters'][0]['cluster']['certificate-authority-data'])) - with open(cert_path, 'wb') as f: - f.write(base64.b64decode(user['client-certificate-data'])) - with open(key_path, 'wb') as f: - f.write(base64.b64decode(user['client-key-data'])) - - result = subprocess.run([ - 'curl', '-s', '--fail', - '--cacert', ca_path, - '--cert', cert_path, - '--key', key_path, - f'{server}/api/v1/namespaces/openshift-config/secrets/pull-secret' - ], capture_output=True, text=True) - - for f in [ca_path, cert_path, key_path]: - os.remove(f) - - if result.returncode != 0: - print(f"Failed to fetch pull secret from {server}: {result.stderr}", file=sys.stderr) - sys.exit(1) - - data = json.loads(result.stdout) - decoded = base64.b64decode(data['data']['.dockerconfigjson']).decode() - auths = json.loads(decoded) - - with open(output_path, 'w') as f: - f.write(decoded) - - print(f"Pull secret extracted: {len(auths.get('auths', {}))} registries") + ansible.builtin.shell: | + python3 << 'PYEOF' + import yaml, json, base64, subprocess, os, sys + + kubeconfig_path = "{{ rhoso_kubeconfig }}" + output_path = "{{ pull_secret_file }}" + + with open(kubeconfig_path) as f: + kc = yaml.safe_load(f) + + server = kc['clusters'][0]['cluster']['server'] + user = kc['users'][0]['user'] + + ca_path = '/tmp/k8s-ca.crt' + cert_path = '/tmp/k8s-client.crt' + key_path = '/tmp/k8s-client.key' + + with open(ca_path, 'wb') as f: + f.write(base64.b64decode(kc['clusters'][0]['cluster']['certificate-authority-data'])) + with open(cert_path, 'wb') as f: + f.write(base64.b64decode(user['client-certificate-data'])) + with open(key_path, 'wb') as f: + f.write(base64.b64decode(user['client-key-data'])) + + result = subprocess.run([ + 'curl', '-s', '--fail', + '--cacert', ca_path, + '--cert', cert_path, + '--key', key_path, + f'{server}/api/v1/namespaces/openshift-config/secrets/pull-secret' + ], capture_output=True, text=True) + + for f in [ca_path, cert_path, key_path]: + os.remove(f) + + if result.returncode != 0: + print(f"Failed to fetch pull secret from {server}: {result.stderr}", file=sys.stderr) + sys.exit(1) + + data = json.loads(result.stdout) + decoded = base64.b64decode(data['data']['.dockerconfigjson']).decode() + auths = json.loads(decoded) + + with open(output_path, 'w') as f: + f.write(decoded) + + print(f"Pull secret extracted: {len(auths.get('auths', {}))} registries") + PYEOF register: _pull_secret_result no_log: true From 8548becebe151bc32b4746e4f77650996c2b122f Mon Sep 17 00:00:00 2001 From: tusharjadhav3302 Date: Thu, 25 Jun 2026 14:32:53 +0530 Subject: [PATCH 12/12] Use tempfile for certs, add auth type check, skip bootstrap if oc exists, add 900s timeout per extract attempt Co-authored-by: Cursor --- .../tasks/get_openshift_release_binaries.yml | 70 ++++++++++++------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml index baf423da..a53995ae 100644 --- a/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml +++ b/collection/tools/roles/tools_get_openshift_release/tasks/get_openshift_release_binaries.yml @@ -4,10 +4,10 @@ # file-cache (openshift-release-artifacts), which has no SLA and can get stuck # indefinitely during tool extraction. # -# Since the shiftstackclient pod starts with no `oc` binary, we bootstrap one -# from mirror.openshift.com, extract the pull secret from the host cluster via -# the Kubernetes API (using the kubeconfig's client certificate), then use -# `oc adm release extract --tools` to get the version-matched binaries. +# The pull secret is extracted from the host cluster via the Kubernetes API +# using the kubeconfig's client certificate. If `oc` is not already present +# in the pod (cold-start), a stable client is bootstrapped from +# mirror.openshift.com before running `oc adm release extract --tools`. - name: Get the OCP installer and/or client binaries vars: installer_tarball: "openshift-install-linux-{{ release_name }}.tar.gz" @@ -29,7 +29,7 @@ - name: Extract pull secret from host cluster via Kubernetes API ansible.builtin.shell: | python3 << 'PYEOF' - import yaml, json, base64, subprocess, os, sys + import yaml, json, base64, subprocess, os, sys, tempfile kubeconfig_path = "{{ rhoso_kubeconfig }}" output_path = "{{ pull_secret_file }}" @@ -40,27 +40,33 @@ server = kc['clusters'][0]['cluster']['server'] user = kc['users'][0]['user'] - ca_path = '/tmp/k8s-ca.crt' - cert_path = '/tmp/k8s-client.crt' - key_path = '/tmp/k8s-client.key' - - with open(ca_path, 'wb') as f: - f.write(base64.b64decode(kc['clusters'][0]['cluster']['certificate-authority-data'])) - with open(cert_path, 'wb') as f: - f.write(base64.b64decode(user['client-certificate-data'])) - with open(key_path, 'wb') as f: - f.write(base64.b64decode(user['client-key-data'])) - - result = subprocess.run([ - 'curl', '-s', '--fail', - '--cacert', ca_path, - '--cert', cert_path, - '--key', key_path, - f'{server}/api/v1/namespaces/openshift-config/secrets/pull-secret' - ], capture_output=True, text=True) + try: + cert_data = user['client-certificate-data'] + key_data = user['client-key-data'] + except KeyError: + print(f"rhoso_kubeconfig must use client-certificate auth, " + f"found auth keys: {list(user.keys())}", file=sys.stderr) + sys.exit(1) - for f in [ca_path, cert_path, key_path]: - os.remove(f) + with tempfile.TemporaryDirectory() as tmpdir: + ca_path = os.path.join(tmpdir, 'ca.crt') + cert_path = os.path.join(tmpdir, 'client.crt') + key_path = os.path.join(tmpdir, 'client.key') + + with open(ca_path, 'wb') as f: + f.write(base64.b64decode(kc['clusters'][0]['cluster']['certificate-authority-data'])) + with open(cert_path, 'wb') as f: + f.write(base64.b64decode(cert_data)) + with open(key_path, 'wb') as f: + f.write(base64.b64decode(key_data)) + + result = subprocess.run([ + 'curl', '-s', '--fail', + '--cacert', ca_path, + '--cert', cert_path, + '--key', key_path, + f'{server}/api/v1/namespaces/openshift-config/secrets/pull-secret' + ], capture_output=True, text=True) if result.returncode != 0: print(f"Failed to fetch pull secret from {server}: {result.stderr}", file=sys.stderr) @@ -85,7 +91,14 @@ register: _pull_secret_verify changed_when: false + - name: Check if oc is already available + ansible.builtin.command: which oc + register: _oc_available + ignore_errors: true + changed_when: false + - name: Bootstrap oc client from {{ bootstrap_oc_url }} + when: _oc_available is failed block: - name: Create bootstrap directory ansible.builtin.file: @@ -103,6 +116,10 @@ retries: 3 delay: 10 + - name: Set oc binary path + ansible.builtin.set_fact: + _oc_bin: "{{ (bootstrap_oc_dir + '/oc') if _oc_available is failed else 'oc' }}" + - name: Create the installer directory ansible.builtin.file: path: "{{ home_dir }}/{{ release_name }}" @@ -112,7 +129,8 @@ - name: Extract OCP tools from release image {{ openshift_release_pull_spec }} ansible.builtin.command: cmd: >- - {{ bootstrap_oc_dir }}/oc adm release extract + timeout 900 + {{ _oc_bin }} adm release extract --tools --registry-config={{ pull_secret_file }} --to={{ home_dir }}/{{ release_name }}