From fecdb98fc3507932c3216614fb41340a5f7f9828 Mon Sep 17 00:00:00 2001 From: Julian Arce <52429267+JuArce@users.noreply.github.com> Date: Tue, 23 Jun 2026 18:19:50 -0300 Subject: [PATCH 1/3] infra: start gpu server every 24hs --- .github/workflows/aggregation_mode.yml | 47 ++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 .github/workflows/aggregation_mode.yml diff --git a/.github/workflows/aggregation_mode.yml b/.github/workflows/aggregation_mode.yml new file mode 100644 index 000000000..0ffe6f40a --- /dev/null +++ b/.github/workflows/aggregation_mode.yml @@ -0,0 +1,47 @@ +name: "Start Aggregation Mode Server" + +# Starts the Paperspace GPU server that runs the aggregation mode. +# +# The server is kept powered off to avoid 24/7 billing. This workflow boots it +# once a day; on boot the machine runs `aggregation_mode.service`, which executes +# the SP1 aggregations and then powers the machine off again +# (see infra/aggregation_mode/run.sh). +on: + schedule: + # 15:00 UTC == 12:00 GMT-3, every day. GitHub Actions cron is always in UTC. + - cron: "0 15 * * *" + workflow_dispatch: + +jobs: + start-server: + name: Start Paperspace aggregation server + runs-on: ubuntu-latest + steps: + - name: Start Paperspace machine + env: + PAPERSPACE_API_KEY: ${{ secrets.PAPERSPACE_API_KEY }} + MACHINE_ID: ${{ secrets.PAPERSPACE_MACHINE_ID }} + run: | + set -euo pipefail + + if [ -z "${PAPERSPACE_API_KEY}" ] || [ -z "${MACHINE_ID}" ]; then + echo "::error::PAPERSPACE_API_KEY and PAPERSPACE_MACHINE_ID secrets must be set." + exit 1 + fi + + echo "Starting Paperspace machine ${MACHINE_ID}..." + http_code=$(curl -sS -o response.json -w "%{http_code}" \ + -X PATCH "https://api.paperspace.com/v1/machines/${MACHINE_ID}/start" \ + -H "Authorization: Bearer ${PAPERSPACE_API_KEY}") + + echo "Paperspace API responded with HTTP ${http_code}" + cat response.json || true + + # 2xx means the start request was accepted. + if [ "${http_code}" -lt 200 ] || [ "${http_code}" -ge 300 ]; then + echo "::error::Failed to start Paperspace machine (HTTP ${http_code})." + exit 1 + fi + + echo "Start request accepted. The machine will run the aggregation on boot and shut itself down afterwards." + \ No newline at end of file From c103d7ed9368673da771f4ba44111f79c279f29d Mon Sep 17 00:00:00 2001 From: Julian Arce <52429267+JuArce@users.noreply.github.com> Date: Tue, 23 Jun 2026 18:21:58 -0300 Subject: [PATCH 2/3] fix eol --- .github/workflows/aggregation_mode.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/aggregation_mode.yml b/.github/workflows/aggregation_mode.yml index 0ffe6f40a..b1b948218 100644 --- a/.github/workflows/aggregation_mode.yml +++ b/.github/workflows/aggregation_mode.yml @@ -44,4 +44,3 @@ jobs: fi echo "Start request accepted. The machine will run the aggregation on boot and shut itself down afterwards." - \ No newline at end of file From 0b92a037d859b48ce55b10ccd98e42175d4c7ecf Mon Sep 17 00:00:00 2001 From: Julian Arce <52429267+JuArce@users.noreply.github.com> Date: Tue, 23 Jun 2026 18:36:23 -0300 Subject: [PATCH 3/3] apply code review --- .github/workflows/aggregation_mode.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/aggregation_mode.yml b/.github/workflows/aggregation_mode.yml index b1b948218..b1416125a 100644 --- a/.github/workflows/aggregation_mode.yml +++ b/.github/workflows/aggregation_mode.yml @@ -16,6 +16,8 @@ jobs: start-server: name: Start Paperspace aggregation server runs-on: ubuntu-latest + timeout-minutes: 5 + permissions: {} steps: - name: Start Paperspace machine env: @@ -30,16 +32,19 @@ jobs: fi echo "Starting Paperspace machine ${MACHINE_ID}..." - http_code=$(curl -sS -o response.json -w "%{http_code}" \ + http_code=$(curl -sS --max-time 30 -o response.json -w "%{http_code}" \ -X PATCH "https://api.paperspace.com/v1/machines/${MACHINE_ID}/start" \ -H "Authorization: Bearer ${PAPERSPACE_API_KEY}") echo "Paperspace API responded with HTTP ${http_code}" - cat response.json || true # 2xx means the start request was accepted. if [ "${http_code}" -lt 200 ] || [ "${http_code}" -ge 300 ]; then + # Only dump the response body on failure: a successful start response + # contains the full machine object (public IP, network details, etc.), + # which GitHub Actions would not mask in the log. echo "::error::Failed to start Paperspace machine (HTTP ${http_code})." + cat response.json || true exit 1 fi