diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 10a3922..1694fa6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -99,10 +99,15 @@ jobs: # validates the output with vexctl when it's on PATH. vexctl is # a Go binary distributed via `go install`. Setting up Go here # is the cheapest way to give every test job a usable vexctl. + # Go must be >= 1.24: its linker only began emitting an LC_UUID load + # command then, and the macOS-latest runner's dyld (Sequoia+) refuses + # to load a Mach-O binary without one ("missing LC_UUID load command"), + # so a 1.22-built vexctl crashes on launch and every e2e_vex assertion + # fails. ubuntu/windows are unaffected, but the matrix shares this pin. # SHA pin resolved from `gh api repos/actions/setup-go/git/refs/tags/v6.4.0`. uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: - go-version: '1.22' + go-version: '1.24' cache: false - name: Install vexctl diff --git a/README.md b/README.md index 19fec6a..3c94488 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,9 @@ Each flag has a matching `SOCKET_*` environment variable. **Precedence is CLI ar | `--proxy-url ` | `SOCKET_PROXY_URL` | Public proxy URL used when no API token is set. | | `-e, --ecosystems ` | `SOCKET_ECOSYSTEMS` | Restrict to specific ecosystems (comma-separated, e.g. `npm,pypi`). | | `--download-mode ` | `SOCKET_DOWNLOAD_MODE` | Artifact to fetch when local files are missing: `diff` (default, smallest delta), `package` (full per-package tarball), or `file` (legacy per-file blobs). | +| `--vendor-source ` | `SOCKET_VENDOR_SOURCE` | How `vendor` acquires the installable artifact: `auto` (default — download the prebuilt package from patch.socket.dev, fall back to a local build on any miss), `service` (require the service, fail-closed), or `build` (always build locally). Covers npm, pypi, cargo, golang, composer, and gem. | +| `--vendor-url ` | `SOCKET_VENDOR_URL` | Base host for the vendoring service's package-reference request (default: the active `--api-url`/`--proxy-url` base). Point at staging / local dev for testing. | +| `--patch-server-url ` | `SOCKET_PATCH_SERVER_URL` | Override the host of the prebuilt-archive download URL the service returns (default: as returned). Mainly for local-dev / testing. | | `--offline` | `SOCKET_OFFLINE` | Strict airgap: never contact the network. Operations that need remote data fail loudly. | | `-g, --global` | `SOCKET_GLOBAL` | Operate on globally-installed packages. | | `--global-prefix ` | `SOCKET_GLOBAL_PREFIX` | Override the path used to discover globally-installed packages. | diff --git a/crates/socket-patch-cli/CLI_CONTRACT.md b/crates/socket-patch-cli/CLI_CONTRACT.md index 9b02e11..e0d7495 100644 --- a/crates/socket-patch-cli/CLI_CONTRACT.md +++ b/crates/socket-patch-cli/CLI_CONTRACT.md @@ -35,6 +35,9 @@ In v3.0 every subcommand accepts the same set of "global" flags via a single sha | `--proxy-url` | — | `SOCKET_PROXY_URL` | `https://patches-api.socket.dev` | string | Public proxy when no token | | `--ecosystems` | `-e` | `SOCKET_ECOSYSTEMS` | (all) | CSV → `Vec` | Restrict to these ecosystems | | `--download-mode` | — | `SOCKET_DOWNLOAD_MODE` | **`diff`** | enum: `diff` \| `package` \| `file` | Patch artifact format | +| `--vendor-source` | — | `SOCKET_VENDOR_SOURCE` | **`auto`** | enum: `auto` \| `service` \| `build` | How `vendor` acquires the installable artifact (see "Prebuilt vendor artifacts") | +| `--vendor-url` | — | `SOCKET_VENDOR_URL` | (active API/proxy base) | string | Base host for the vendoring-service package-reference request | +| `--patch-server-url` | — | `SOCKET_PATCH_SERVER_URL` | (server-returned) | string | Override the host of the prebuilt-archive download URL (local-dev / testing) | | `--offline` | — | `SOCKET_OFFLINE` | `false` | bool | **Strict airgap on every command** — never contact the network | | `--global` | `-g` | `SOCKET_GLOBAL` | `false` | bool | Operate on globally-installed packages | | `--global-prefix` | — | `SOCKET_GLOBAL_PREFIX` | (auto) | path | Override global packages root | @@ -326,6 +329,46 @@ machines with **no socket-patch installed and no Socket API access** (registry a unvendored dependencies may still be needed). Every mechanism below was validated against the real package managers (`spikes/PHASE0-FINDINGS.txt`). +**Prebuilt vendor artifacts (`--vendor-source`)**: by default (`auto`) `vendor` first tries to +DOWNLOAD the already-built patched artifact + integrity from the patch.socket.dev vendoring service, +and silently falls back to building it locally on any non-fatal miss. `service` requires the service +(fail-closed); `build` always builds locally (the pre-service behavior). The download is a two-step +flow on the configured API/proxy host (`--vendor-url` overrides it): a package-reference POST +(`/v0/orgs/{slug}/patches/package` authenticated, else the public proxy's `/patch/package`) yields a +grant-tokenized serve URL + integrity, then a GET fetches the archive (`--patch-server-url` rewrites +that URL's host for local-dev / testing). The downloaded bytes are ALWAYS integrity-verified before +use (sha512 SRI for every ecosystem; golang additionally the `h1:` module dirhash) — a mismatch is a +hard error, never a silent fallback. A service-vended package reports each patched file as +`AlreadyPatched` (trust is the verified service integrity, not a local re-apply). The fallback ladder +per service outcome: + +| Service outcome | `auto` | `service` | +|---|---|---| +| granted/reused, integrity ok | **use service** | **use service** | +| integrity mismatch | local build + `vendor_prebuilt_integrity_mismatch` | refuse (`vendor_prebuilt_required`) | +| still building (`pending_build` / serve 408) | local build + `vendor_prebuilt_pending` | refuse | +| not built / withdrawn / not found / no usable artifact | local build (quiet) | refuse | +| 401 / 403 grant / 5xx / network error | local build + `vendor_prebuilt_unavailable` | refuse | +| `--offline` | local build | refuse (`vendor_service_offline_conflict`) | + +Coverage today: **npm** (all lock flavors), **pypi** (wheel — sdist falls back / refuses), **cargo** +(download + extract the `.crate`), **golang** (download + extract the module zip, verify the `h1:` +dirhash, wire the `replace`), **composer** (download + extract the dist zip), and **gem** (download + +extract the `.gem`, plus a `gem-stub-gemspec` SECOND artifact). The Tier-B ecosystems +(cargo/golang/composer/gem) download the patched archive and extract it into the vendor directory — +the same source tree the local build commits — then run the existing path-dep wiring; their +build-equivalence is exercised by the toolchain-backed e2e suites (which skip when the package +manager is absent). **gem** needs the extra `gem-stub-gemspec` artifact because a path-sourced gem +needs an eval-able stub gemspec that the `.gem` archive doesn't carry in bundler's required form (a +`.gem` keeps the gemspec as YAML in `metadata.gz`); the converter generates that stub and serves it +alongside the `.gem`, and the gem backend downloads + integrity-verifies both. A served gem whose +stub is missing (a native-extension gem, for which the converter emits no stub, or a patch built +before the stub rollout) is treated as a service miss — `auto` falls back to the local build, +`service` refuses (`vendor_prebuilt_required`). For any ecosystem with no service path at all +`auto`/`build` build locally as before, and `service` refuses with +`vendor_service_unsupported_ecosystem`. A successful service vend emits `vendor_prebuilt_downloaded`. +Unrelated to `--download-mode` (which selects the patch-CONTENT format for the local build). + **Patch sources stay in memory (v3.4)**: vendoring never writes `.socket/blobs/`, `.socket/diffs/`, or temporary patch files. Pre-existing `.socket/` artifacts (from a prior `apply`/`get`/`repair`) are read in place; already-vendored purls re-stage patch content from the committed artifact itself @@ -523,6 +566,9 @@ All v3.0 env vars use the `SOCKET_*` prefix. Three legacy `SOCKET_PATCH_*` names | `SOCKET_PROXY_URL` | `--proxy-url` | `https://patches-api.socket.dev` | **Renamed in v3.0** (was `SOCKET_PATCH_PROXY_URL`). | | `SOCKET_ECOSYSTEMS` | `--ecosystems` / `-e` | (all) | Comma-separated list. | | `SOCKET_DOWNLOAD_MODE` | `--download-mode` | `diff` | One of `diff` / `package` / `file`. | +| `SOCKET_VENDOR_SOURCE` | `--vendor-source` | `auto` | One of `auto` / `service` / `build`. | +| `SOCKET_VENDOR_URL` | `--vendor-url` | (active API/proxy base) | Vendoring-service package-reference host. | +| `SOCKET_PATCH_SERVER_URL` | `--patch-server-url` | (server-returned) | Rewrites the prebuilt-archive download host. | | `SOCKET_OFFLINE` | `--offline` | `false` | — | | `SOCKET_GLOBAL` | `--global` / `-g` | `false` | — | | `SOCKET_GLOBAL_PREFIX` | `--global-prefix` | (auto) | — | diff --git a/crates/socket-patch-cli/src/args.rs b/crates/socket-patch-cli/src/args.rs index 1fde519..3e4562d 100644 --- a/crates/socket-patch-cli/src/args.rs +++ b/crates/socket-patch-cli/src/args.rs @@ -22,6 +22,7 @@ use socket_patch_core::constants::{ DEFAULT_PATCH_API_PROXY_URL, DEFAULT_PATCH_MANIFEST_PATH, DEFAULT_SOCKET_API_URL, }; use socket_patch_core::crawlers::Ecosystem; +use socket_patch_core::patch::vendor::VendorSource; /// clap value-parser for each `--ecosystems` / `SOCKET_ECOSYSTEMS` token. /// @@ -49,6 +50,16 @@ fn parse_supported_ecosystem(s: &str) -> Result { } } +/// clap value-parser for `--vendor-source` / `SOCKET_VENDOR_SOURCE`. +/// +/// Validates the token against [`VendorSource`] (`auto` | `service` | `build`, +/// case-insensitive) at parse time so a typo fails the command immediately +/// rather than at vendor time, and normalizes it to the canonical lowercase +/// tag. Mirrors [`parse_supported_ecosystem`]'s fail-loud-on-typo posture. +fn parse_vendor_source(s: &str) -> Result { + VendorSource::parse(s).map(|v| v.as_tag().to_string()) +} + /// clap value-parser for boolean flags backed by an env var. /// /// Identical to clap's stock `BoolishValueParser` (case-insensitive @@ -134,6 +145,35 @@ pub struct GlobalArgs { )] pub download_mode: String, + /// Where `vendor` acquires the installable patched artifact. `auto` + /// (default) downloads the prebuilt archive from the patch.socket.dev + /// vendoring service and silently falls back to a local build on any miss; + /// `service` requires the service and fails closed; `build` always builds + /// locally (the pre-service behavior). Only `vendor` uses this; other + /// subcommands accept it silently. + #[arg( + long = "vendor-source", + env = "SOCKET_VENDOR_SOURCE", + default_value = "auto", + value_parser = parse_vendor_source, + )] + pub vendor_source: String, + + /// Base URL for the patch vendoring service's package-reference request + /// (the step-1 POST). Defaults to the active API base (`--api-url`) when + /// authenticated or the proxy base (`--proxy-url`) otherwise. Override to + /// point `vendor` at staging / local dev independently of `--api-url`. + #[arg(long = "vendor-url", env = "SOCKET_VENDOR_URL")] + pub vendor_url: Option, + + /// Override the host of the prebuilt-archive download URL the vendoring + /// service returns (the step-2 GET). When set, the CLI rewrites the + /// scheme + host (+ port) of the returned URL to this base, preserving the + /// path. Mainly for local-dev / testing, where the host the server bakes + /// into the URL is not the one to actually fetch from. + #[arg(long = "patch-server-url", env = "SOCKET_PATCH_SERVER_URL")] + pub patch_server_url: Option, + /// Strict airgap: never contact the network. Operations that need remote /// data fail loudly when this is set. #[arg( @@ -330,6 +370,9 @@ pub const GLOBAL_ARG_ENV_VARS: &[&str] = &[ "SOCKET_PROXY_URL", "SOCKET_ECOSYSTEMS", "SOCKET_DOWNLOAD_MODE", + "SOCKET_VENDOR_SOURCE", + "SOCKET_VENDOR_URL", + "SOCKET_PATCH_SERVER_URL", "SOCKET_OFFLINE", "SOCKET_GLOBAL", "SOCKET_GLOBAL_PREFIX", @@ -390,6 +433,9 @@ impl Default for GlobalArgs { proxy_url: String::new(), ecosystems: None, download_mode: "diff".to_string(), + vendor_source: "auto".to_string(), + vendor_url: None, + patch_server_url: None, offline: false, strict: false, global: false, @@ -524,6 +570,7 @@ mod tests { std::env::set_var("SOCKET_GLOBAL_PREFIX", ""); std::env::set_var("SOCKET_ECOSYSTEMS", ""); std::env::set_var("SOCKET_DOWNLOAD_MODE", ""); + std::env::set_var("SOCKET_VENDOR_SOURCE", ""); std::env::set_var("SOCKET_MANIFEST_PATH", "keep.json"); std::env::set_var("SOCKET_ORG_SLUG", " "); @@ -552,10 +599,95 @@ mod tests { assert!(cli.common.global_prefix.is_none()); assert!(cli.common.ecosystems.is_none()); assert_eq!(cli.common.download_mode, "diff"); + assert_eq!( + cli.common.vendor_source, "auto", + "empty SOCKET_VENDOR_SOURCE must fall back to the `auto` default" + ); assert_eq!(cli.common.manifest_path, "keep.json"); }); } + /// `--vendor-source` parses every known token, normalizes case, honors the + /// env var, and defaults to `auto`; an unknown token aborts the parse. + #[test] + #[serial_test::serial] + fn vendor_source_flag_parses_normalizes_and_defaults() { + with_clean_socket_env(|| { + // Default when unset. + let cli = TestCli::try_parse_from(["socket-patch"]).unwrap(); + assert_eq!(cli.common.vendor_source, "auto"); + + // CLI value, case-normalized to the canonical tag. + let cli = + TestCli::try_parse_from(["socket-patch", "--vendor-source", "SERVICE"]).unwrap(); + assert_eq!(cli.common.vendor_source, "service"); + + // Env var honored. + std::env::set_var("SOCKET_VENDOR_SOURCE", "build"); + let cli = TestCli::try_parse_from(["socket-patch"]).unwrap(); + assert_eq!(cli.common.vendor_source, "build"); + std::env::remove_var("SOCKET_VENDOR_SOURCE"); + + // Garbage is rejected at parse time. + assert!( + TestCli::try_parse_from(["socket-patch", "--vendor-source", "download"]).is_err(), + "an unknown vendor source must fail the parse", + ); + }); + } + + /// The new URL knobs flow through to the parsed args from CLI and env. + #[test] + #[serial_test::serial] + fn vendor_url_and_patch_server_url_flow_from_cli_and_env() { + with_clean_socket_env(|| { + let cli = TestCli::try_parse_from([ + "socket-patch", + "--vendor-url", + "https://patch.socket-staging.dev", + "--patch-server-url", + "http://localhost:4026", + ]) + .unwrap(); + assert_eq!( + cli.common.vendor_url.as_deref(), + Some("https://patch.socket-staging.dev") + ); + assert_eq!( + cli.common.patch_server_url.as_deref(), + Some("http://localhost:4026") + ); + + std::env::set_var("SOCKET_VENDOR_URL", "https://from-env.example"); + let cli = TestCli::try_parse_from(["socket-patch"]).unwrap(); + assert_eq!( + cli.common.vendor_url.as_deref(), + Some("https://from-env.example") + ); + std::env::remove_var("SOCKET_VENDOR_URL"); + // Unset by default. + let cli = TestCli::try_parse_from(["socket-patch"]).unwrap(); + assert!(cli.common.vendor_url.is_none()); + assert!(cli.common.patch_server_url.is_none()); + }); + } + + /// Single-source-of-truth guard: the new env vars must be registered in + /// `GLOBAL_ARG_ENV_VARS` (drives the scrub + clean-env harness). + #[test] + fn global_arg_env_vars_includes_vendor_knobs() { + for var in [ + "SOCKET_VENDOR_SOURCE", + "SOCKET_VENDOR_URL", + "SOCKET_PATCH_SERVER_URL", + ] { + assert!( + GLOBAL_ARG_ENV_VARS.contains(&var), + "{var} must be in GLOBAL_ARG_ENV_VARS", + ); + } + } + /// `parse_bool_flag` accepts the same vocabulary as clap's /// `BoolishValueParser`, case-insensitively and with surrounding whitespace /// trimmed. diff --git a/crates/socket-patch-cli/src/commands/repair_vendor.rs b/crates/socket-patch-cli/src/commands/repair_vendor.rs index ee9801c..8a36101 100644 --- a/crates/socket-patch-cli/src/commands/repair_vendor.rs +++ b/crates/socket-patch-cli/src/commands/repair_vendor.rs @@ -602,6 +602,8 @@ pub(crate) async fn repair_vendored_artifacts( &vendored_at, false, false, + // Repair rebuilds locally from the recorded patch — no service. + None, ) .await; match outcome { diff --git a/crates/socket-patch-cli/src/commands/scan.rs b/crates/socket-patch-cli/src/commands/scan.rs index 7d75263..a4b478d 100644 --- a/crates/socket-patch-cli/src/commands/scan.rs +++ b/crates/socket-patch-cli/src/commands/scan.rs @@ -1277,8 +1277,10 @@ fn boxed_vendor_records<'a>( detached: bool, env: &'a mut Envelope, ) -> std::pin::Pin + 'a>> { + // `scan --vendor` builds locally (no vendoring-service config); the + // `vendor` command is the service-download entry point. Box::pin(vendor_records( - common, records, sources, detached, false, env, + common, records, sources, detached, false, env, None, )) } diff --git a/crates/socket-patch-cli/src/commands/vendor.rs b/crates/socket-patch-cli/src/commands/vendor.rs index dae1815..24f9780 100644 --- a/crates/socket-patch-cli/src/commands/vendor.rs +++ b/crates/socket-patch-cli/src/commands/vendor.rs @@ -25,7 +25,7 @@ use socket_patch_core::patch::apply::{verify_file_patch, PatchSources}; use socket_patch_core::patch::copy_tree::remove_tree; use socket_patch_core::patch::vendor::{ self, ecosystem_dir_for_purl, load_state, save_state, RevertOutcome, VendorEntry, - VendorOutcome, VendorWarning, + VendorOutcome, VendorServiceConfig, VendorSource, VendorWarning, }; use socket_patch_core::utils::purl::{normalize_purl, strip_purl_qualifiers}; use socket_patch_core::utils::telemetry::{track_patch_vendor_failed, track_patch_vendored}; @@ -99,8 +99,35 @@ pub(crate) async fn dispatch_vendor_one( vendored_at: &str, dry_run: bool, force: bool, + // The patch.socket.dev vendoring-service config. `None` = build-only (the + // pre-service behavior); used by the `vendor` command, `None` from `scan + // --vendor` / repair. Per-ecosystem backends consume it as they gain a + // service path. + service: Option<&VendorServiceConfig>, ) -> Option { let eco = ecosystem_dir_for_purl(purl)?; + + // Prebuilt service downloads now cover every vendorable ecosystem: npm, + // pypi, cargo, golang, composer, and gem. Gem's `.gem` archive doesn't + // carry the eval-able stub gemspec a bundler path source wants, so the + // converter generates it and serves it as a `gem-stub-gemspec` second + // artifact alongside the `.gem` (the gem backend downloads + verifies both). + // Under fail-closed `service` mode, refuse any not-covered ecosystem with a + // clear message rather than silently building (which would violate the + // contract). Under `auto`/`build` they fall through to the local build. + const SERVICE_ECOSYSTEMS: &[&str] = &["npm", "pypi", "cargo", "golang", "composer", "gem"]; + if let Some(cfg) = service { + if cfg.source.requires_service() && !SERVICE_ECOSYSTEMS.contains(&eco) { + return Some(VendorOutcome::Refused { + code: "vendor_service_unsupported_ecosystem", + detail: format!( + "--vendor-source=service is not supported for `{eco}` \ + (prebuilt downloads cover npm, pypi, cargo, golang, composer, and gem); \ + use --vendor-source=auto or --vendor-source=build" + ), + }); + } + } Some(match eco { "npm" => { // The flavor router probes the project's lockfile (package-lock / @@ -114,6 +141,7 @@ pub(crate) async fn dispatch_vendor_one( vendored_at, dry_run, force, + service, ) .await } @@ -127,6 +155,7 @@ pub(crate) async fn dispatch_vendor_one( vendored_at, dry_run, force, + service, ) .await } @@ -140,6 +169,7 @@ pub(crate) async fn dispatch_vendor_one( vendored_at, dry_run, force, + service, ) .await } @@ -154,6 +184,7 @@ pub(crate) async fn dispatch_vendor_one( vendored_at, dry_run, force, + service, ) .await } @@ -168,6 +199,7 @@ pub(crate) async fn dispatch_vendor_one( vendored_at, dry_run, force, + service, ) .await } @@ -182,6 +214,7 @@ pub(crate) async fn dispatch_vendor_one( vendored_at, dry_run, force, + service, ) .await } @@ -311,11 +344,24 @@ pub(crate) fn record_warning( pub async fn run(args: VendorArgs) -> i32 { apply_env_toggles(&args.common); - let (telemetry_client, _) = + let (telemetry_client, use_public_proxy) = get_api_client_with_overrides(args.common.api_client_overrides()).await; let api_token = telemetry_client.api_token().cloned(); let org_slug = telemetry_client.org_slug().cloned(); + // Vendoring-service config, built once from the run-level client + flags. + // `vendor_source` was validated by clap, so the parse cannot fail; fall + // back to the `auto` default defensively. The same client is reused for + // the package-reference request (no second auth round-trip). + let vendor_service = VendorServiceConfig { + source: VendorSource::parse(&args.common.vendor_source).unwrap_or_default(), + client: Some(telemetry_client.clone()), + use_public_proxy, + vendor_url: args.common.vendor_url.clone(), + patch_server_url: args.common.patch_server_url.clone(), + offline: args.common.offline, + }; + let manifest_path = args.common.resolved_manifest_path(); let socket_dir = manifest_path .parent() @@ -363,7 +409,7 @@ pub async fn run(args: VendorArgs) -> i32 { let exit = if args.revert { run_revert(&args, &mut env).await } else { - run_vendor(&args, &manifest_path, &mut env).await + run_vendor(&args, &manifest_path, &mut env, &vendor_service).await }; // Embedded VEX: same contract as `apply --vex` — only on success, and a @@ -415,7 +461,12 @@ pub async fn run(args: VendorArgs) -> i32 { exit } -async fn run_vendor(args: &VendorArgs, manifest_path: &Path, env: &mut Envelope) -> i32 { +async fn run_vendor( + args: &VendorArgs, + manifest_path: &Path, + env: &mut Envelope, + service: &VendorServiceConfig, +) -> i32 { let common = &args.common; let manifest = match read_manifest(manifest_path).await { Ok(Some(m)) => m, @@ -455,7 +506,16 @@ async fn run_vendor(args: &VendorArgs, manifest_path: &Path, env: &mut Envelope) }; let sources = staged.as_patch_sources(); - has_errors |= vendor_records(common, &manifest.patches, &sources, false, args.force, env).await; + has_errors |= vendor_records( + common, + &manifest.patches, + &sources, + false, + args.force, + env, + Some(service), + ) + .await; if has_errors { env.mark_partial_failure(); @@ -615,6 +675,9 @@ pub(crate) async fn vendor_records( detached: bool, force: bool, env: &mut Envelope, + // Vendoring-service config (`None` = build-only). The `vendor` command + // passes `Some(_)`; `scan --vendor` passes `None` today. + service: Option<&VendorServiceConfig>, ) -> bool { let mut has_errors = false; let manifest_purls: Vec = records.keys().cloned().collect(); @@ -889,6 +952,7 @@ pub(crate) async fn vendor_records( &vendored_at, common.dry_run, force, + service, ) .await; diff --git a/crates/socket-patch-cli/tests/cli_global_args.rs b/crates/socket-patch-cli/tests/cli_global_args.rs index 6faebd8..e06b95e 100644 --- a/crates/socket-patch-cli/tests/cli_global_args.rs +++ b/crates/socket-patch-cli/tests/cli_global_args.rs @@ -76,6 +76,17 @@ fn global_flag_cases() -> Vec<(&'static str, Option<&'static str>, fn(&GlobalArg ("--download-mode", Some("package"), |c| { assert_eq!(c.download_mode, "package") }), + ("--vendor-source", Some("service"), |c| { + assert_eq!(c.vendor_source, "service") + }), + ("--vendor-url", Some("https://vendor.example.com"), |c| { + assert_eq!(c.vendor_url.as_deref(), Some("https://vendor.example.com")) + }), + ( + "--patch-server-url", + Some("http://localhost:4026"), + |c| assert_eq!(c.patch_server_url.as_deref(), Some("http://localhost:4026")), + ), ("--offline", None, |c| assert!(c.offline)), ("--global", None, |c| assert!(c.global)), ("--global-prefix", Some("/opt/global"), |c| { @@ -203,13 +214,16 @@ fn global_flag_cases_cover_every_global_field() { debug: _, no_telemetry: _, strict: _, + vendor_source: _, + vendor_url: _, + patch_server_url: _, } = common; - // 20 fields ↔ 20 long-flag cases. Bump both this count and add a case when + // 23 fields ↔ 23 long-flag cases. Bump both this count and add a case when // the destructure above forces you to add a field. assert_eq!( global_flag_cases().len(), - 20, + 23, "every GlobalArgs field needs a long-flag case in global_flag_cases()", ); diff --git a/crates/socket-patch-core/src/api/client.rs b/crates/socket-patch-core/src/api/client.rs index 5a8df6f..0f0ffe1 100644 --- a/crates/socket-patch-core/src/api/client.rs +++ b/crates/socket-patch-core/src/api/client.rs @@ -632,10 +632,401 @@ impl ApiClient { text, ))) } + + /// Resolve a published-patch UUID into a prebuilt vendored archive + + /// integrity from the patch.socket.dev vendoring service, then download it. + /// + /// Two HTTP round-trips: + /// 1. POST the package-reference endpoint (`/v0/orgs/{slug}/patches/package` + /// when authenticated, else the public proxy's `/patch/package`) to mint + /// / reuse a download grant and learn the artifact URL + integrity. + /// 2. GET the returned grant-tokenized serve URL for the archive bytes. + /// + /// `vendor_url` overrides the step-1 base host; `patch_server_url` rewrites + /// the step-2 download host (both for staging / local-dev / testing). The + /// returned [`FetchedVendorPackage`] carries the *unverified* bytes plus the + /// service-reported integrity — the caller verifies before use. + pub async fn fetch_vendor_package( + &self, + uuid: &str, + free_only: bool, + vendor_url: Option<&str>, + patch_server_url: Option<&str>, + ) -> VendorServiceOutcome { + if !is_valid_uuid(uuid) { + return VendorServiceOutcome::Failed(ApiError::InvalidHash(format!( + "Invalid patch UUID: {uuid}" + ))); + } + + // ── Step 1: resolve the grant URL + integrity ────────────────────── + let result = match self.request_vendor_package(uuid, free_only, vendor_url).await { + Ok(r) => r, + Err(e) => return VendorServiceOutcome::Failed(e), + }; + // Classify the build/grant status before attempting any download. + match result.status.as_str() { + "granted" | "reused" => {} + "pending_build" => return VendorServiceOutcome::Pending, + "build_failed" | "withdrawn" | "not_found" => { + return VendorServiceOutcome::Unavailable(result.status.clone()) + } + "forbidden" => { + return VendorServiceOutcome::Failed(ApiError::Forbidden( + "Forbidden: not entitled to this patch (paid tier or no org access).".into(), + )) + } + other => { + return VendorServiceOutcome::Unavailable(format!("unknown status `{other}`")) + } + } + + // Select the native tarball artifact and its sha512 (the universal + // integrity floor — every ecosystem's tarball carries it). The npm + // yarn-berry-zip artifact is intentionally ignored here (v1). + let Some(artifact) = result + .artifacts + .as_ref() + .and_then(|arts| arts.iter().find(|a| a.kind == "tarball")) + else { + return VendorServiceOutcome::Unavailable("no tarball artifact in response".into()); + }; + let Some(sha512_raw) = artifact.integrity.sha512.as_deref() else { + return VendorServiceOutcome::Unavailable( + "tarball artifact has no sha512 integrity".into(), + ); + }; + let integrity_sri = normalize_sha512_sri(sha512_raw); + // The artifact's own URL wins; fall back to the top-level `url`. + let Some(download_url) = artifact.url.as_deref().or(result.url.as_deref()) else { + return VendorServiceOutcome::Unavailable("granted result has no download url".into()); + }; + let download_url = match patch_server_url { + Some(base) => match rewrite_url_host(download_url, base) { + Ok(u) => u, + Err(e) => return VendorServiceOutcome::Failed(e), + }, + None => download_url.to_string(), + }; + + // Surface the OTHER served artifacts (e.g. the gem path-source stub + // gemspec) — their host-rewritten URL + normalized sha512 — so a + // backend that needs one can download + verify it lazily. Each is + // skipped unless it carries both a url and a sha512. + let mut secondary_artifacts: Vec = Vec::new(); + if let Some(arts) = result.artifacts.as_ref() { + for a in arts { + if a.kind == "tarball" { + continue; + } + let (Some(url), Some(sha512)) = + (a.url.as_deref(), a.integrity.sha512.as_deref()) + else { + continue; + }; + let url = match patch_server_url { + Some(base) => match rewrite_url_host(url, base) { + Ok(u) => u, + Err(_) => continue, + }, + None => url.to_string(), + }; + secondary_artifacts.push(SecondaryArtifact { + kind: a.kind.clone(), + url, + integrity_sri: normalize_sha512_sri(sha512), + }); + } + } + + // ── Step 2: download the prebuilt archive ────────────────────────── + match self.download_vendor_archive(&download_url).await { + ServeDownload::Ok(bytes) => VendorServiceOutcome::Ready(FetchedVendorPackage { + tarball: bytes, + integrity_sri, + sha1_hex: artifact.integrity.sha1.clone(), + dirhash_h1: artifact.integrity.dirhash_h1.clone(), + size_bytes: artifact.size_bytes, + content_type: artifact.content_type.clone(), + source_url: download_url, + secondary_artifacts, + }), + ServeDownload::NotFound => { + VendorServiceOutcome::Unavailable("serve returned 404/410".into()) + } + ServeDownload::Pending => VendorServiceOutcome::Pending, + ServeDownload::Failed(e) => VendorServiceOutcome::Failed(e), + } + } + + /// Step 1 of [`Self::fetch_vendor_package`]: POST the package-reference + /// endpoint and return the single requested UUID's result. + async fn request_vendor_package( + &self, + uuid: &str, + free_only: bool, + vendor_url: Option<&str>, + ) -> Result { + let body = PackageVendorRequest { + uuids: vec![uuid.to_string()], + // Only send freeOnly when forcing it (the public-proxy contract); + // the authenticated endpoint defaults to false. + free_only: free_only.then_some(true), + }; + // Authenticated when a token + org slug are configured and we're not + // pinned to the public proxy — mirrors `binary_url`'s decision so a + // bearer is never sent to the proxy. + let use_auth = + self.api_token.is_some() && self.org_slug.is_some() && !self.use_public_proxy; + let base = vendor_url + .unwrap_or(&self.api_url) + .trim_end_matches('/') + .to_string(); + + let resp = if use_auth { + let slug = self.org_slug.as_deref().unwrap(); + let url = format!("{base}/v0/orgs/{slug}/patches/package"); + debug_log(&format!("POST {url}")); + self.client + .post(&url) + .header(header::CONTENT_TYPE, "application/json") + .json(&body) + .send() + .await + } else { + let url = format!("{base}/patch/package"); + debug_log(&format!("POST {url}")); + // Plain (no-auth) client: never leak the bearer to the proxy. + plain_client() + .post(&url) + .header(header::CONTENT_TYPE, "application/json") + .header(header::ACCEPT, "application/json") + .json(&body) + .send() + .await + }; + + let resp = resp.map_err(|e| ApiError::Network(format!("Network error: {e}")))?; + let status = resp.status(); + if status == StatusCode::OK { + let parsed = resp.json::().await.map_err(|e| { + ApiError::Parse(format!("Failed to parse package response: {e}")) + })?; + return parsed.results.get(uuid).cloned().ok_or_else(|| { + ApiError::Other(format!("package response missing a result for {uuid}")) + }); + } + if let Some(err) = classify_auth_error(status, !use_auth) { + return Err(err); + } + let text = resp.text().await.unwrap_or_default(); + Err(ApiError::Other(format!( + "package request failed with status {}: {text}", + status.as_u16(), + ))) + } + + /// Step 2 of [`Self::fetch_vendor_package`]: GET the grant-tokenized serve + /// URL. The grant token in the path is the authorization, so this uses a + /// plain (no-auth) client. + async fn download_vendor_archive(&self, url: &str) -> ServeDownload { + if !(url.starts_with("https://") || url.starts_with("http://")) { + return ServeDownload::Failed(ApiError::Other(format!( + "refusing non-http(s) artifact URL `{url}`" + ))); + } + debug_log(&format!("GET vendor package {url}")); + let resp = match plain_client() + .get(url) + .header(header::ACCEPT, "application/octet-stream") + .send() + .await + { + Ok(r) => r, + Err(e) => { + return ServeDownload::Failed(ApiError::Network(format!( + "Network error fetching vendor package: {e}" + ))) + } + }; + let status = resp.status(); + match status { + StatusCode::OK => {} + // 404 (build_failed / not stored) and 410 (withdrawn) are terminal + // misses; the caller decides build-fallback vs hard-fail. + StatusCode::NOT_FOUND | StatusCode::GONE => return ServeDownload::NotFound, + // 408 = the archive is still building (Retry-After) — retryable. + StatusCode::REQUEST_TIMEOUT => return ServeDownload::Pending, + _ => { + if let Some(err) = classify_auth_error(status, true) { + return ServeDownload::Failed(err); + } + let text = resp.text().await.unwrap_or_default(); + return ServeDownload::Failed(ApiError::Other(format!( + "vendor package download failed with status {}: {text}", + status.as_u16(), + ))); + } + } + match read_capped(resp, MAX_VENDOR_PACKAGE_BYTES).await { + Ok(bytes) => ServeDownload::Ok(bytes), + Err(e) => ServeDownload::Failed(ApiError::Network(e)), + } + } + + /// Download a secondary artifact (e.g. the gem stub gemspec) from its + /// grant-tokenized serve URL. Same plain-client + cap discipline as the + /// tarball download; the caller verifies the bytes against the artifact's + /// integrity. A 404/410/408 surfaces as an error (a secondary the + /// reference promised should be present). + pub async fn download_artifact(&self, url: &str) -> Result, ApiError> { + match self.download_vendor_archive(url).await { + ServeDownload::Ok(bytes) => Ok(bytes), + ServeDownload::NotFound => { + Err(ApiError::Other(format!("artifact not found: {url}"))) + } + ServeDownload::Pending => { + Err(ApiError::Other(format!("artifact still building: {url}"))) + } + ServeDownload::Failed(e) => Err(e), + } + } } // ── Free functions ──────────────────────────────────────────────────── +/// Cap on a single prebuilt-archive download (defensive bound against a +/// runaway / hostile serve response). Generous enough for any real package. +const MAX_VENDOR_PACKAGE_BYTES: u64 = 256 * 1024 * 1024; + +/// A prebuilt vendored archive downloaded from the patch.socket.dev service, +/// together with the service-reported integrity. The bytes are **unverified** +/// here — callers must verify against `integrity_sri` (and, for golang, the +/// `h1:` dirhash) before writing/extracting. +#[derive(Debug, Clone)] +pub struct FetchedVendorPackage { + pub tarball: Vec, + /// Normalized Subresource-Integrity string, always `sha512-`. + pub integrity_sri: String, + /// Hex sha1 of the archive, when the service reported one. + pub sha1_hex: Option, + /// golang module-zip dirhash (`h1:`), when present. + pub dirhash_h1: Option, + pub size_bytes: Option, + pub content_type: Option, + /// The (possibly host-rewritten) URL the bytes were fetched from. + pub source_url: String, + /// The OTHER served artifacts (e.g. the gem path-source stub gemspec), + /// each with a host-rewritten URL + normalized sha512, for a backend to + /// download + verify lazily via [`ApiClient::download_artifact`]. + pub secondary_artifacts: Vec, +} + +/// A non-tarball served artifact reference (e.g. `gem-stub-gemspec`): its kind, +/// final download URL, and sha512 SRI. Bytes are fetched + verified on demand. +#[derive(Debug, Clone)] +pub struct SecondaryArtifact { + pub kind: String, + pub url: String, + /// Normalized `sha512-` of the artifact bytes. + pub integrity_sri: String, +} + +/// Outcome of [`ApiClient::fetch_vendor_package`]. +/// +/// The vendor backends map these onto the `auto`/`service`/`build` policy: +/// `Ready` → use the service archive; `Pending`/`Unavailable`/`Failed` → fall +/// back to a local build under `auto`, or hard-fail under `service`. +#[derive(Debug)] +pub enum VendorServiceOutcome { + /// Archive downloaded; integrity carried for the caller to verify. + Ready(FetchedVendorPackage), + /// The archive is still building (`pending_build` status or serve 408) — + /// retryable. + Pending, + /// A terminal miss for this input (not built, withdrawn, not found, or no + /// usable artifact). `String` is a short reason for logging. + Unavailable(String), + /// A request/transport/auth failure (401/403 grant, 5xx, network, malformed). + Failed(ApiError), +} + +/// Internal result of the step-2 archive GET. +enum ServeDownload { + Ok(Vec), + /// 404 / 410 — terminal miss. + NotFound, + /// 408 — still building, retryable. + Pending, + Failed(ApiError), +} + +/// Build a plain `reqwest::Client` carrying only the User-Agent — no +/// Authorization. Used for the public-proxy POST and the grant-tokenized serve +/// GET, where sending the Socket bearer would leak it to a third party. +fn plain_client() -> reqwest::Client { + let mut headers = HeaderMap::new(); + headers.insert( + header::USER_AGENT, + HeaderValue::from_static(USER_AGENT_VALUE), + ); + reqwest::Client::builder() + .default_headers(headers) + .build() + .expect("failed to build plain reqwest client") +} + +/// Stream a response body into memory with a hard byte cap, rejecting both an +/// over-large declared `Content-Length` and an actual stream that exceeds the +/// cap mid-flight. +async fn read_capped(mut resp: reqwest::Response, max: u64) -> Result, String> { + if let Some(len) = resp.content_length() { + if len > max { + return Err(format!( + "vendor package too large: declared {len} bytes > {max} cap" + )); + } + } + let mut bytes: Vec = Vec::new(); + while let Some(chunk) = resp + .chunk() + .await + .map_err(|e| format!("error reading vendor package body: {e}"))? + { + if bytes.len() as u64 + chunk.len() as u64 > max { + return Err(format!("vendor package exceeded {max}-byte cap mid-stream")); + } + bytes.extend_from_slice(&chunk); + } + Ok(bytes) +} + +/// Normalize a service-reported sha512 into SRI form (`sha512-`). +/// +/// The service persists npm SRI form, but tolerate a bare base64 digest by +/// prefixing it — `verify_sri` (the consumer) expects the `sha512-` prefix. +fn normalize_sha512_sri(value: &str) -> String { + let v = value.trim(); + if v.starts_with("sha512-") { + v.to_string() + } else { + format!("sha512-{v}") + } +} + +/// Rewrite the scheme + host (+ port) of `original` to those of `new_base`, +/// preserving `original`'s path and query. Used to redirect a server-returned +/// serve URL at a local-dev / test host (`--patch-server-url`). +fn rewrite_url_host(original: &str, new_base: &str) -> Result { + let orig = reqwest::Url::parse(original) + .map_err(|e| ApiError::Other(format!("malformed serve URL `{original}`: {e}")))?; + let mut base = reqwest::Url::parse(new_base) + .map_err(|e| ApiError::Other(format!("malformed --patch-server-url `{new_base}`: {e}")))?; + base.set_path(orig.path()); + base.set_query(orig.query()); + Ok(base.to_string()) +} + /// Explicit overrides for environment-based API client construction. /// /// Each `Some(value)` wins over the corresponding env var; `None` falls @@ -2011,3 +2402,336 @@ mod tests { assert_eq!(info.title, "A summary"); } } + +#[cfg(test)] +mod vendor_package_tests { + use super::*; + use serde_json::json; + use wiremock::matchers::{body_partial_json, method, path}; + use wiremock::{Match, Mock, MockServer, Request, ResponseTemplate}; + + const UUID: &str = "11111111-1111-1111-1111-111111111111"; + const SERVE_PATH: &str = "/patch/npm/lodash/4.17.21/tok/uuid/lodash-4.17.21.tgz"; + const TARBALL: &[u8] = b"prebuilt deterministic tarball bytes"; + + /// Matches a request that carries NO `Authorization` header — proves the + /// proxy POST and the grant-tokenized serve GET never leak the bearer. + struct NoAuthorizationHeader; + impl Match for NoAuthorizationHeader { + fn matches(&self, request: &Request) -> bool { + !request.headers.contains_key("authorization") + } + } + + fn auth_client(uri: String) -> ApiClient { + ApiClient::new(ApiClientOptions { + api_url: uri, + api_token: Some("sktsec_token_placeholder_value_api".into()), + use_public_proxy: false, + org_slug: Some("acme".into()), + }) + } + + fn proxy_client(uri: String) -> ApiClient { + ApiClient::new(ApiClientOptions { + api_url: uri, + api_token: None, + use_public_proxy: true, + org_slug: None, + }) + } + + /// A `granted` body whose tarball artifact points at `serve_url`. + fn granted_body(serve_url: &str, sha512: &str) -> serde_json::Value { + json!({ + "results": { + UUID: { + "status": "granted", + "url": serve_url, + "purl": "pkg:npm/lodash@4.17.21", + "artifacts": [{ + "kind": "tarball", + "url": serve_url, + "contentType": "application/gzip", + "sizeBytes": TARBALL.len(), + "integrity": { "sha512": sha512, "sha1": "deadbeef" } + }] + } + } + }) + } + + async fn mount_status(server: &MockServer, status: &str) { + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "results": { UUID: { "status": status, "url": null, "artifacts": [] } } + }))) + .expect(1) + .mount(server) + .await; + } + + #[tokio::test] + async fn granted_authenticated_downloads_and_returns_bytes() { + let server = MockServer::start().await; + let serve_url = format!("{}{SERVE_PATH}", server.uri()); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .and(body_partial_json(json!({ "uuids": [UUID] }))) + .respond_with(ResponseTemplate::new(200).set_body_json(granted_body(&serve_url, "sha512-ABC123=="))) + .expect(1) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path(SERVE_PATH)) + .and(NoAuthorizationHeader) + .respond_with(ResponseTemplate::new(200).set_body_bytes(TARBALL.to_vec())) + .expect(1) + .mount(&server) + .await; + + let outcome = auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, None) + .await; + match outcome { + VendorServiceOutcome::Ready(pkg) => { + assert_eq!(pkg.tarball, TARBALL); + assert_eq!(pkg.integrity_sri, "sha512-ABC123=="); + assert_eq!(pkg.sha1_hex.as_deref(), Some("deadbeef")); + assert_eq!(pkg.source_url, serve_url); + } + other => panic!("expected Ready, got {other:?}"), + } + } + + #[tokio::test] + async fn proxy_path_posts_to_patch_route_without_auth_and_forces_free_only() { + let server = MockServer::start().await; + let serve_url = format!("{}{SERVE_PATH}", server.uri()); + Mock::given(method("POST")) + .and(path("/patch/package")) + .and(NoAuthorizationHeader) + .and(body_partial_json(json!({ "uuids": [UUID], "freeOnly": true }))) + .respond_with(ResponseTemplate::new(200).set_body_json(granted_body(&serve_url, "sha512-ZZ=="))) + .expect(1) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path(SERVE_PATH)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(TARBALL.to_vec())) + .expect(1) + .mount(&server) + .await; + + // free_only=true (public-proxy contract). + let outcome = proxy_client(server.uri()) + .fetch_vendor_package(UUID, true, None, None) + .await; + assert!(matches!(outcome, VendorServiceOutcome::Ready(_))); + } + + #[tokio::test] + async fn bare_sha512_is_normalized_to_sri() { + let server = MockServer::start().await; + let serve_url = format!("{}{SERVE_PATH}", server.uri()); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(granted_body(&serve_url, "BAREB64=="))) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path(SERVE_PATH)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(TARBALL.to_vec())) + .mount(&server) + .await; + + match auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, None) + .await + { + VendorServiceOutcome::Ready(pkg) => assert_eq!(pkg.integrity_sri, "sha512-BAREB64=="), + other => panic!("expected Ready, got {other:?}"), + } + } + + #[tokio::test] + async fn patch_server_url_rewrites_the_download_host() { + let server = MockServer::start().await; + // The server bakes an UNREACHABLE host into the URL; --patch-server-url + // redirects the GET at the mock while preserving the path. + let baked = format!("https://patch.socket.dev{SERVE_PATH}"); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(granted_body(&baked, "sha512-AA=="))) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path(SERVE_PATH)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(TARBALL.to_vec())) + .expect(1) + .mount(&server) + .await; + + let outcome = auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, Some(&server.uri())) + .await; + match outcome { + VendorServiceOutcome::Ready(pkg) => { + assert!(pkg.source_url.starts_with(&server.uri()), "host rewritten"); + assert!(pkg.source_url.ends_with(SERVE_PATH), "path preserved"); + } + other => panic!("expected Ready, got {other:?}"), + } + } + + #[tokio::test] + async fn pending_build_status_skips_download_and_is_pending() { + let server = MockServer::start().await; + mount_status(&server, "pending_build").await; + // No GET mock mounted: a download attempt would 404 the server's + // catch-all (no mock) — but more importantly we never get there. + let outcome = auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, None) + .await; + assert!(matches!(outcome, VendorServiceOutcome::Pending)); + } + + #[tokio::test] + async fn serve_408_is_pending() { + let server = MockServer::start().await; + let serve_url = format!("{}{SERVE_PATH}", server.uri()); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(granted_body(&serve_url, "sha512-AA=="))) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path(SERVE_PATH)) + .respond_with(ResponseTemplate::new(408)) + .mount(&server) + .await; + let outcome = auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, None) + .await; + assert!(matches!(outcome, VendorServiceOutcome::Pending)); + } + + #[tokio::test] + async fn terminal_statuses_are_unavailable() { + for status in ["build_failed", "withdrawn", "not_found"] { + let server = MockServer::start().await; + mount_status(&server, status).await; + let outcome = auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, None) + .await; + assert!( + matches!(outcome, VendorServiceOutcome::Unavailable(_)), + "status {status} must be Unavailable", + ); + } + } + + #[tokio::test] + async fn forbidden_status_is_failed() { + let server = MockServer::start().await; + mount_status(&server, "forbidden").await; + let outcome = auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, None) + .await; + assert!(matches!( + outcome, + VendorServiceOutcome::Failed(ApiError::Forbidden(_)) + )); + } + + #[tokio::test] + async fn serve_404_and_403_and_5xx_map_correctly() { + // 404 → Unavailable + for (code, expect_failed) in [(404u16, false), (410, false), (403, true), (503, true)] { + let server = MockServer::start().await; + let serve_url = format!("{}{SERVE_PATH}", server.uri()); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with( + ResponseTemplate::new(200).set_body_json(granted_body(&serve_url, "sha512-AA==")), + ) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path(SERVE_PATH)) + .respond_with(ResponseTemplate::new(code)) + .mount(&server) + .await; + let outcome = auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, None) + .await; + if expect_failed { + assert!( + matches!(outcome, VendorServiceOutcome::Failed(_)), + "serve {code} must be Failed", + ); + } else { + assert!( + matches!(outcome, VendorServiceOutcome::Unavailable(_)), + "serve {code} must be Unavailable", + ); + } + } + } + + #[tokio::test] + async fn no_tarball_artifact_is_unavailable() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "results": { UUID: { + "status": "granted", + "url": null, + "artifacts": [{ "kind": "yarn-berry-zip", "url": "https://x/y.zip", + "integrity": { "yarnBerry10c0": "10c0/abc" } }] + }} + }))) + .mount(&server) + .await; + let outcome = auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, None) + .await; + assert!(matches!(outcome, VendorServiceOutcome::Unavailable(_))); + } + + #[tokio::test] + async fn tarball_without_sha512_is_unavailable() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "results": { UUID: { + "status": "granted", + "url": "https://x/y.tgz", + "artifacts": [{ "kind": "tarball", "url": "https://x/y.tgz", + "integrity": { "sha1": "deadbeef" } }] + }} + }))) + .mount(&server) + .await; + let outcome = auth_client(server.uri()) + .fetch_vendor_package(UUID, false, None, None) + .await; + assert!(matches!(outcome, VendorServiceOutcome::Unavailable(_))); + } + + #[tokio::test] + async fn invalid_uuid_is_failed_without_network() { + // No server: an early UUID-shape rejection must not make any request. + let client = auth_client("http://127.0.0.1:1".into()); + let outcome = client + .fetch_vendor_package("not-a-uuid", false, None, None) + .await; + assert!(matches!( + outcome, + VendorServiceOutcome::Failed(ApiError::InvalidHash(_)) + )); + } +} diff --git a/crates/socket-patch-core/src/api/types.rs b/crates/socket-patch-core/src/api/types.rs index f6bffb1..2ecf2e2 100644 --- a/crates/socket-patch-core/src/api/types.rs +++ b/crates/socket-patch-core/src/api/types.rs @@ -95,6 +95,79 @@ pub struct BatchSearchResponse { pub can_access_paid_patches: bool, } +/// Request body for the package-vendor endpoint: `POST +/// /v0/orgs/{slug}/patches/package` (authenticated) and `POST /patch/package` +/// (public proxy). Resolves published-patch UUIDs into prebuilt vendored-archive +/// download URLs + integrity. The public proxy forces `free_only`. +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct PackageVendorRequest { + pub uuids: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub free_only: Option, +} + +/// Response from the package-vendor endpoint: one result per requested UUID, +/// keyed by the UUID string. +#[derive(Debug, Clone, Deserialize)] +pub struct PackageVendorResponse { + pub results: HashMap, +} + +/// One package-vendor result. `status` is the discriminator; `url` / `purl` / +/// `artifacts` are populated only for `granted` / `reused`. +/// +/// `status` values: `granted` | `reused` | `pending_build` | `build_failed` +/// | `withdrawn` | `forbidden` | `not_found`. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PackageVendorResult { + pub status: String, + #[serde(default)] + pub url: Option, + #[serde(default)] + pub purl: Option, + #[serde(default)] + pub artifacts: Option>, +} + +/// One served artifact: the native tarball (`kind: "tarball"`), or a +/// second artifact — npm's yarn-berry cache zip (`kind: "yarn-berry-zip"`) or +/// gem's path-source stub gemspec (`kind: "gem-stub-gemspec"`). `url` is null +/// only when the artifact isn't stored yet (e.g. an unbuilt berry zip). +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PackageVendorArtifact { + pub kind: String, + #[serde(default)] + pub url: Option, + #[serde(default)] + pub content_type: Option, + #[serde(default)] + pub size_bytes: Option, + #[serde(default)] + pub integrity: PackageVendorIntegrity, +} + +/// Per-artifact integrity hashes. Every ecosystem's tarball populates `sha512` +/// (npm SRI form `sha512-`) + `sha1` + `md5`; golang additionally +/// `dirhash_h1` (`h1:`); the npm yarn-berry zip carries only +/// `yarn_berry10c0` (`10c0/`). No ecosystem exposes a plain sha256. +#[derive(Debug, Clone, Default, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PackageVendorIntegrity { + #[serde(default)] + pub sha512: Option, + #[serde(default)] + pub sha1: Option, + #[serde(default)] + pub md5: Option, + #[serde(default)] + pub dirhash_h1: Option, + #[serde(default)] + pub yarn_berry10c0: Option, +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/socket-patch-core/src/patch/go_redirect.rs b/crates/socket-patch-core/src/patch/go_redirect.rs index 63a43f2..976defc 100644 --- a/crates/socket-patch-core/src/patch/go_redirect.rs +++ b/crates/socket-patch-core/src/patch/go_redirect.rs @@ -528,7 +528,7 @@ async fn redirect_in_sync( /// crash / power loss / `ENOSPC` mid-write would otherwise commit a torn or /// empty `go.mod`. A reader (a concurrent `go build`, or the file landing in a /// commit) then only ever sees the complete file, never a half-written one. -async fn ensure_module_go_mod(copy_dir: &Path, module: &str) -> std::io::Result<()> { +pub(crate) async fn ensure_module_go_mod(copy_dir: &Path, module: &str) -> std::io::Result<()> { let go_mod = copy_dir.join("go.mod"); if tokio::fs::metadata(&go_mod).await.is_ok() { return Ok(()); diff --git a/crates/socket-patch-core/src/patch/vendor/bun_lock.rs b/crates/socket-patch-core/src/patch/vendor/bun_lock.rs index f35a2c2..9019acb 100644 --- a/crates/socket-patch-core/src/patch/vendor/bun_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/bun_lock.rs @@ -74,6 +74,7 @@ pub async fn vendor_bun( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&super::VendorServiceConfig>, ) -> VendorOutcome { let mut warnings: Vec = Vec::new(); @@ -137,6 +138,7 @@ pub async fn vendor_bun( dry_run, force, &mut warnings, + service, ) .await { @@ -853,6 +855,7 @@ mod tests { "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await } diff --git a/crates/socket-patch-core/src/patch/vendor/cargo.rs b/crates/socket-patch-core/src/patch/vendor/cargo.rs index 5e4eeea..757370e 100644 --- a/crates/socket-patch-core/src/patch/vendor/cargo.rs +++ b/crates/socket-patch-core/src/patch/vendor/cargo.rs @@ -28,11 +28,13 @@ use crate::utils::purl::{parse_cargo_purl, strip_purl_qualifiers}; use super::cargo_config::{self, LEGACY_CARGO_PATCHES_DIR}; use super::cargo_lock::{self, LockEditError, LockEntryOriginal}; use super::path::vendor_uuid_dir_rel; +use super::registry_fetch::extract_tgz; +use super::service_fetch::{fetch_verified_archive, ServiceArtifact}; use super::state::{ write_marker, CargoLockOriginal, VendorArtifact, VendorEntry, VendorMarker, WiringAction, WiringRecord, }; -use super::{RevertOutcome, VendorOutcome, VendorWarning}; +use super::{RevertOutcome, VendorOutcome, VendorServiceConfig, VendorWarning}; /// True if a crate is vendored under `/vendor/` (in either the /// `-/` or bare `/` layout the cargo crawler probes). A @@ -144,6 +146,99 @@ fn done( } } +/// Outcome of attempting to materialise the cargo copy from the patch service. +enum CargoServiceCopy { + /// The prebuilt crate was extracted into `copy_dir`. + Used, + /// Bubble this terminal outcome (boxed — `VendorOutcome` is large). + HardFail(Box), + /// Fall back to copying + patching the pristine source. + FallBack, +} + +/// Download the prebuilt `.crate`, integrity-verify it, and extract it into +/// `copy_dir` (a path-dep copy must carry no `.cargo-checksum.json`). Maps each +/// service outcome onto the `auto` / `service` fallback policy. The extracted +/// crate IS the patched package the converter built, so it needs no pristine +/// source — which is the point of the service path. +async fn cargo_service_copy( + service: Option<&VendorServiceConfig>, + record: &PatchRecord, + name: &str, + copy_dir: &Path, + uuid_dir: &Path, + warnings: &mut Vec, +) -> CargoServiceCopy { + let Some(cfg) = service else { + return CargoServiceCopy::FallBack; + }; + if !cfg.service_enabled() { + return CargoServiceCopy::FallBack; + } + fn hard(code: &'static str, detail: String) -> CargoServiceCopy { + CargoServiceCopy::HardFail(Box::new(VendorOutcome::Refused { code, detail })) + } + let miss = |warnings: &mut Vec, code: &'static str, reason: String| { + if cfg.source.requires_service() { + hard("vendor_prebuilt_required", reason) + } else { + warnings.push(VendorWarning::new(code, format!("{reason}; building locally instead"))); + CargoServiceCopy::FallBack + } + }; + match fetch_verified_archive(cfg, &record.uuid, name).await { + ServiceArtifact::Ready(archive) => { + // Clean copy dir, then extract the `.crate` (tar.gz; strip its + // single `{name}-{version}/` top-level dir) into it. + let _ = remove_tree(copy_dir).await; + if let Err(e) = tokio::fs::create_dir_all(copy_dir).await { + return hard( + "vendor_prebuilt_write_failed", + format!("cannot create {}: {e}", copy_dir.display()), + ); + } + if let Err(e) = extract_tgz(&archive.bytes, copy_dir) { + let _ = remove_tree(uuid_dir).await; + return hard( + "vendor_prebuilt_extract_failed", + format!("cannot extract the prebuilt crate: {e}"), + ); + } + let _ = tokio::fs::remove_file(copy_dir.join(".cargo-checksum.json")).await; + warnings.push(VendorWarning::new( + "vendor_prebuilt_downloaded", + format!("vendored {name} from the patch service ({})", archive.source_url), + )); + CargoServiceCopy::Used + } + ServiceArtifact::IntegrityMismatch(reason) => miss( + warnings, + "vendor_prebuilt_integrity_mismatch", + format!("prebuilt crate failed integrity ({reason})"), + ), + ServiceArtifact::Pending => miss( + warnings, + "vendor_prebuilt_pending", + "prebuilt crate is still building".to_string(), + ), + ServiceArtifact::Unavailable(reason) => { + if cfg.source.requires_service() { + hard( + "vendor_prebuilt_required", + format!("prebuilt crate unavailable: {reason}"), + ) + } else { + CargoServiceCopy::FallBack + } + } + ServiceArtifact::Failed(reason) => miss( + warnings, + "vendor_prebuilt_unavailable", + format!("patch service request failed ({reason})"), + ), + } +} + /// Vendor one cargo crate: patched copy + `[patch.crates-io]` entry + /// `Cargo.lock` surgery + marker, returning the ledger entry to persist. /// @@ -165,6 +260,7 @@ pub async fn vendor_cargo_crate( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&VendorServiceConfig>, ) -> VendorOutcome { // ── coordinate validation (fail-closed, before any disk access) ────── let Some((name, version)) = parse_cargo_purl(purl) else { @@ -357,60 +453,82 @@ pub async fn vendor_cargo_crate( } // ── materialise the patched copy ────────────────────────────────────── - // Skip any `.cargo-checksum.json`: cargo 1.93 registry/src dirs no longer - // carry one (spike surprise), but older layouts do and a path-dep copy - // must never include it (its presence would re-enable checksum fixups). - if let Err(e) = fresh_copy(pristine_src, ©_dir, Some(".cargo-checksum.json")).await { - // Clear the whole uuid dir, not just the copy: a partial copy (or an - // empty `/` husk) under .socket/vendor/ would be misjudged by - // verify/sweep. - let _ = remove_tree(&uuid_dir).await; - return done( - synthesized_result( + // Prefer the prebuilt `.crate` from the patch service (download + extract, + // no pristine source needed); else copy the pristine source and patch it. + // Either way a path-dep copy must never carry a `.cargo-checksum.json` + // (cargo 1.93 src dirs no longer have one, but older layouts do and its + // presence would re-enable checksum fixups). + let mut warnings: Vec = Vec::new(); + if let Some(cfg) = service { + if cfg.source.requires_service() && cfg.offline { + return VendorOutcome::Refused { + code: "vendor_service_offline_conflict", + detail: "--vendor-source=service needs the network but --offline is set" + .to_string(), + }; + } + } + let mut result = match cargo_service_copy(service, record, name, ©_dir, &uuid_dir, &mut warnings) + .await + { + CargoServiceCopy::Used => { + // The service crate is the patched package; trust its verified + // integrity (every file reads as AlreadyPatched). + let verified = record.files.keys().map(|f| already_patched_verify(f)).collect(); + synthesized_result(purl, ©_dir, verified, true, None) + } + CargoServiceCopy::HardFail(outcome) => return *outcome, + CargoServiceCopy::FallBack => { + if let Err(e) = fresh_copy(pristine_src, ©_dir, Some(".cargo-checksum.json")).await { + // Clear the whole uuid dir, not just the copy: a partial copy + // (or an empty `/` husk) under .socket/vendor/ would be + // misjudged by verify/sweep. + let _ = remove_tree(&uuid_dir).await; + return done( + synthesized_result( + purl, + ©_dir, + Vec::new(), + false, + Some(format!("failed to copy pristine source: {e}")), + ), + None, + warnings, + ); + } + // Delegate to the hardened pipeline (vendor auto-force policy — + // see `force_apply_staged`), pointed at the copy. + let mut result = super::force_apply_staged( purl, ©_dir, - Vec::new(), + record, + sources, false, - Some(format!("failed to copy pristine source: {e}")), - ), - None, - Vec::new(), - ); - } - - // Delegate to the hardened pipeline (vendor auto-force policy — see - // `force_apply_staged`), pointed at the copy. - let mut warnings: Vec = Vec::new(); - let mut result = super::force_apply_staged( - purl, - ©_dir, - record, - sources, - false, - force, - name, - version, - &mut warnings, - ) - .await; - result.package_path = copy_dir.display().to_string(); - - if !result.success { - // Don't leave a half-built copy (or an empty uuid husk) that - // verify/sweep would misjudge. - let _ = remove_tree(&uuid_dir).await; - return done(result, None, warnings); - } - - // A path-dep copy must never carry a checksum sidecar. The fresh copy - // excluded it; enforce the invariant defensively in case the patch itself - // recreated the file. - let _ = tokio::fs::remove_file(copy_dir.join(".cargo-checksum.json")).await; - debug_assert!( - result.sidecar.is_none(), - "vendor copy must not produce a cargo sidecar" - ); - result.sidecar = None; + force, + name, + version, + &mut warnings, + ) + .await; + result.package_path = copy_dir.display().to_string(); + if !result.success { + // Don't leave a half-built copy (or an empty uuid husk) that + // verify/sweep would misjudge. + let _ = remove_tree(&uuid_dir).await; + return done(result, None, warnings); + } + // A path-dep copy must never carry a checksum sidecar. The fresh + // copy excluded it; enforce defensively in case the patch recreated + // the file. + let _ = tokio::fs::remove_file(copy_dir.join(".cargo-checksum.json")).await; + debug_assert!( + result.sidecar.is_none(), + "vendor copy must not produce a cargo sidecar" + ); + result.sidecar = None; + result + } + }; // ── wire the config entry ───────────────────────────────────────────── if let Err(e) = cargo_config::ensure_patch_entry(project_root, name, ©_rel, false).await { @@ -750,6 +868,7 @@ mod tests { "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await } @@ -1360,4 +1479,228 @@ mod tests { lock_body() ); } + + // ─────────────── service-download path (Tier B: cargo) ─────────────── + // + // cargo vendors a patched source DIRECTORY, so the service path downloads + // the prebuilt `.crate`, verifies it, and extracts it into the copy dir. + // Both the service path AND the local-build fallback are exercised. + + use crate::api::client::{ApiClient, ApiClientOptions}; + use crate::patch::vendor::{VendorServiceConfig, VendorSource}; + + fn sri_sha512(bytes: &[u8]) -> String { + use base64::Engine as _; + use sha2::{Digest as _, Sha512}; + format!( + "sha512-{}", + base64::engine::general_purpose::STANDARD.encode(Sha512::digest(bytes)) + ) + } + + fn cargo_service_cfg(uri: &str, source: VendorSource, offline: bool) -> VendorServiceConfig { + VendorServiceConfig { + source, + client: Some(ApiClient::new(ApiClientOptions { + api_url: uri.to_string(), + api_token: Some("sktsec_placeholder_value_for_tests_api".into()), + use_public_proxy: false, + org_slug: Some("acme".into()), + })), + use_public_proxy: false, + vendor_url: None, + patch_server_url: None, + offline, + } + } + + /// Build a `.crate` (tar.gz with a single `{prefix}/` top-level dir). + fn make_crate_tgz(prefix: &str, files: &[(&str, &[u8])]) -> Vec { + use std::io::Write as _; + let mut builder = tar::Builder::new(Vec::new()); + for (rel, content) in files { + let mut header = tar::Header::new_gnu(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder + .append_data(&mut header, format!("{prefix}/{rel}"), *content) + .unwrap(); + } + let tar_bytes = builder.into_inner().unwrap(); + let mut enc = + flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default()); + enc.write_all(&tar_bytes).unwrap(); + enc.finish().unwrap() + } + + async fn mount_cargo_granted(server: &wiremock::MockServer, sha512: &str, crate_bytes: &[u8]) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + let serve_path = format!("/patch/cargo/cfg-if/1.0.4/tok/{UUID}/cfg-if-1.0.4.crate"); + let serve_url = format!("{}{serve_path}", server.uri()); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "results": { UUID: { + "status": "granted", + "url": serve_url, + "purl": PURL, + "artifacts": [{ "kind": "tarball", "url": serve_url, + "integrity": { "sha512": sha512 } }] + }} + }))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path(serve_path)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(crate_bytes.to_vec())) + .mount(server) + .await; + } + + async fn mount_cargo_status(server: &wiremock::MockServer, status: &str) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "results": { UUID: { "status": status, "url": null, "artifacts": [] } } + }))) + .mount(server) + .await; + } + + fn copy_lib(root: &Path) -> PathBuf { + root.join(format!(".socket/vendor/cargo/{UUID}/cfg-if-1.0.4/src/lib.rs")) + } + + /// Service success: the prebuilt crate is extracted into the copy dir (with + /// the patched content, no checksum sidecar), the config is wired, and a + /// `vendor_prebuilt_downloaded` advisory is emitted — WITHOUT touching the + /// pristine source (a deliberately-missing path). + #[tokio::test] + async fn service_success_extracts_crate_and_wires_config() { + let (dir, blobs, _pristine, record) = fixture().await; + let root = dir.path(); + let crate_tgz = make_crate_tgz( + "cfg-if-1.0.4", + &[ + ("src/lib.rs", PATCHED), + ("Cargo.toml", b"[package]\nname = \"cfg-if\"\nversion = \"1.0.4\"\n"), + (".cargo-checksum.json", b"{\"files\":{}}"), + ], + ); + let sri = sri_sha512(&crate_tgz); + let server = wiremock::MockServer::start().await; + mount_cargo_granted(&server, &sri, &crate_tgz).await; + let sources = PatchSources::blobs_only(&blobs); + + // A deliberately-missing pristine source: the service path must not need it. + let bogus_pristine = root.join("no-such-pristine"); + let outcome = vendor_cargo_crate( + PURL, + &bogus_pristine, + root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&cargo_service_cfg(&server.uri(), VendorSource::Service, false)), + ) + .await; + let (result, entry, warnings) = expect_done(outcome); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + assert_eq!(tokio::fs::read(copy_lib(root)).await.unwrap(), PATCHED); + assert!( + !root + .join(format!(".socket/vendor/cargo/{UUID}/cfg-if-1.0.4/.cargo-checksum.json")) + .exists(), + "path-dep copy must not carry a checksum sidecar" + ); + let cfg = tokio::fs::read_to_string(root.join(".cargo/config.toml")) + .await + .unwrap(); + assert!(cfg.contains("[patch.crates-io]") && cfg.contains("cfg-if"), "{cfg}"); + assert!(warnings.iter().any(|w| w.code == "vendor_prebuilt_downloaded")); + } + + /// `service` mode + integrity mismatch hard-fails, nothing extracted. + #[tokio::test] + async fn service_integrity_mismatch_service_mode_hard_fails() { + let (dir, blobs, pristine, record) = fixture().await; + let root = dir.path(); + let crate_tgz = make_crate_tgz("cfg-if-1.0.4", &[("src/lib.rs", PATCHED)]); + let wrong = sri_sha512(b"different bytes"); + let server = wiremock::MockServer::start().await; + mount_cargo_granted(&server, &wrong, &crate_tgz).await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_cargo_crate( + PURL, + &pristine, + root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&cargo_service_cfg(&server.uri(), VendorSource::Service, false)), + ) + .await; + expect_refused(outcome, "vendor_prebuilt_required"); + assert!(!root.join(format!(".socket/vendor/cargo/{UUID}")).exists()); + } + + /// `auto` + a not-built service status falls back to the local build (which + /// copies the pristine source + patches it). + #[tokio::test] + async fn service_unavailable_auto_falls_back_to_build() { + let (dir, blobs, pristine, record) = fixture().await; + let root = dir.path(); + let server = wiremock::MockServer::start().await; + mount_cargo_status(&server, "not_found").await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_cargo_crate( + PURL, + &pristine, + root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&cargo_service_cfg(&server.uri(), VendorSource::Auto, false)), + ) + .await; + let (result, entry, _) = expect_done(outcome); + assert!(result.success, "auto must fall back to the local build: {:?}", result.error); + assert!(entry.is_some()); + // The locally-built copy has the patched content. + assert_eq!(tokio::fs::read(copy_lib(root)).await.unwrap(), PATCHED); + } + + /// `--offline` + `--vendor-source=service` refuses without any network. + #[tokio::test] + async fn offline_service_mode_refuses() { + let (dir, blobs, pristine, record) = fixture().await; + let root = dir.path(); + let sources = PatchSources::blobs_only(&blobs); + let outcome = vendor_cargo_crate( + PURL, + &pristine, + root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&cargo_service_cfg("http://127.0.0.1:1", VendorSource::Service, true)), + ) + .await; + expect_refused(outcome, "vendor_service_offline_conflict"); + } } diff --git a/crates/socket-patch-core/src/patch/vendor/composer_lock.rs b/crates/socket-patch-core/src/patch/vendor/composer_lock.rs index 30a0334..a007891 100644 --- a/crates/socket-patch-core/src/patch/vendor/composer_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/composer_lock.rs @@ -45,10 +45,12 @@ use crate::utils::fs::atomic_write_bytes; use crate::utils::purl::{build_composer_purl, parse_composer_purl}; use super::path::{parse_vendor_path, vendor_uuid_dir_rel}; +use super::registry_fetch::extract_zip; +use super::service_fetch::{fetch_verified_archive, ServiceArtifact}; use super::state::{ write_marker, VendorArtifact, VendorEntry, VendorMarker, WiringAction, WiringRecord, }; -use super::{RevertOutcome, VendorOutcome, VendorWarning}; +use super::{RevertOutcome, VendorOutcome, VendorServiceConfig, VendorWarning}; /// Project-relative lockfile this backend wires. const COMPOSER_LOCK: &str = "composer.lock"; @@ -95,6 +97,7 @@ pub async fn vendor_composer( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&VendorServiceConfig>, ) -> VendorOutcome { // ── coordinates ────────────────────────────────────────────────────── let Some(((vendor, name), version)) = parse_composer_purl(purl) else { @@ -272,42 +275,72 @@ pub async fn vendor_composer( } // ── copy + patch (wiring last) ─────────────────────────────────────── - if let Err(e) = fresh_copy(installed_dir, ©_dir, None).await { - return VendorOutcome::Done { - result: synthesized_result( - purl, - ©_dir, - Vec::new(), - false, - Some(format!("failed to copy installed package: {e}")), - ), - entry: None, - warnings: Vec::new(), - }; - } + // Prefer the prebuilt dist zip from the patch service (download + extract, + // no installed package needed); else copy the installed package and patch + // it. let mut warnings: Vec = Vec::new(); - let mut result = super::force_apply_staged( - purl, - ©_dir, + if let Some(cfg) = service { + if cfg.source.requires_service() && cfg.offline { + return refused( + "vendor_service_offline_conflict", + "--vendor-source=service needs the network but --offline is set", + ); + } + } + let mut result = match composer_service_copy( + service, record, - sources, - false, - force, &pkg, - version, + ©_dir, + &uuid_dir, &mut warnings, ) - .await; - result.package_path = copy_dir.display().to_string(); - if !result.success { - // Don't leave a half-built copy; the lock was never touched. - let _ = remove_tree(&uuid_dir).await; - return VendorOutcome::Done { - result, - entry: None, - warnings, - }; - } + .await + { + ComposerServiceCopy::Used => { + let verified = record.files.keys().map(|f| already_patched_verify(f)).collect(); + synthesized_result(purl, ©_dir, verified, true, None) + } + ComposerServiceCopy::HardFail(outcome) => return *outcome, + ComposerServiceCopy::FallBack => { + if let Err(e) = fresh_copy(installed_dir, ©_dir, None).await { + return VendorOutcome::Done { + result: synthesized_result( + purl, + ©_dir, + Vec::new(), + false, + Some(format!("failed to copy installed package: {e}")), + ), + entry: None, + warnings, + }; + } + let mut result = super::force_apply_staged( + purl, + ©_dir, + record, + sources, + false, + force, + &pkg, + version, + &mut warnings, + ) + .await; + result.package_path = copy_dir.display().to_string(); + if !result.success { + // Don't leave a half-built copy; the lock was never touched. + let _ = remove_tree(&uuid_dir).await; + return VendorOutcome::Done { + result, + entry: None, + warnings, + }; + } + result + } + }; // ── lock rewrite ───────────────────────────────────────────────────── let original_entry = lock[section][idx].clone(); @@ -484,6 +517,96 @@ pub async fn revert_composer( // ── helpers ────────────────────────────────────────────────────────────────── +/// Outcome of attempting to materialise the composer copy from the patch service. +enum ComposerServiceCopy { + /// The prebuilt dist zip was extracted into `copy_dir`. + Used, + /// Bubble this terminal outcome (boxed — `VendorOutcome` is large). + HardFail(Box), + /// Fall back to copying + patching the installed package. + FallBack, +} + +/// Download the prebuilt dist zip, integrity-verify it, and extract it into +/// `copy_dir` (dropping the zip's variable top-level dir). Maps each service +/// outcome onto the `auto` / `service` fallback policy. The extracted zip IS +/// the patched package, so it needs no installed copy. +async fn composer_service_copy( + service: Option<&VendorServiceConfig>, + record: &PatchRecord, + pkg: &str, + copy_dir: &Path, + uuid_dir: &Path, + warnings: &mut Vec, +) -> ComposerServiceCopy { + let Some(cfg) = service else { + return ComposerServiceCopy::FallBack; + }; + if !cfg.service_enabled() || record.files.is_empty() { + return ComposerServiceCopy::FallBack; + } + fn hard(code: &'static str, detail: String) -> ComposerServiceCopy { + ComposerServiceCopy::HardFail(Box::new(VendorOutcome::Refused { code, detail })) + } + let miss = |warnings: &mut Vec, code: &'static str, reason: String| { + if cfg.source.requires_service() { + hard("vendor_prebuilt_required", reason) + } else { + warnings.push(VendorWarning::new(code, format!("{reason}; building locally instead"))); + ComposerServiceCopy::FallBack + } + }; + match fetch_verified_archive(cfg, &record.uuid, pkg).await { + ServiceArtifact::Ready(archive) => { + let _ = remove_tree(copy_dir).await; + if let Err(e) = tokio::fs::create_dir_all(copy_dir).await { + return hard( + "vendor_prebuilt_write_failed", + format!("cannot create {}: {e}", copy_dir.display()), + ); + } + // composer dist zips carry a single variable top-level dir. + if let Err(e) = extract_zip(&archive.bytes, copy_dir, /*strip_first=*/ true) { + let _ = remove_tree(uuid_dir).await; + return hard( + "vendor_prebuilt_extract_failed", + format!("cannot extract the prebuilt dist zip: {e}"), + ); + } + warnings.push(VendorWarning::new( + "vendor_prebuilt_downloaded", + format!("vendored {pkg} from the patch service ({})", archive.source_url), + )); + ComposerServiceCopy::Used + } + ServiceArtifact::IntegrityMismatch(reason) => miss( + warnings, + "vendor_prebuilt_integrity_mismatch", + format!("prebuilt dist zip failed integrity ({reason})"), + ), + ServiceArtifact::Pending => miss( + warnings, + "vendor_prebuilt_pending", + "prebuilt dist zip is still building".to_string(), + ), + ServiceArtifact::Unavailable(reason) => { + if cfg.source.requires_service() { + hard( + "vendor_prebuilt_required", + format!("prebuilt dist zip unavailable: {reason}"), + ) + } else { + ComposerServiceCopy::FallBack + } + } + ServiceArtifact::Failed(reason) => miss( + warnings, + "vendor_prebuilt_unavailable", + format!("patch service request failed ({reason})"), + ), + } +} + fn refused(code: &'static str, detail: impl Into) -> VendorOutcome { VendorOutcome::Refused { code, @@ -853,6 +976,7 @@ mod tests { "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await } @@ -1316,4 +1440,222 @@ mod tests { "uuid dir still removed" ); } + + // ─────────────── service-download path (Tier B: composer) ─────────────── + + use crate::api::client::{ApiClient, ApiClientOptions}; + use crate::patch::vendor::{VendorServiceConfig, VendorSource}; + + fn sri_sha512(bytes: &[u8]) -> String { + use base64::Engine as _; + use sha2::{Digest as _, Sha512}; + format!( + "sha512-{}", + base64::engine::general_purpose::STANDARD.encode(Sha512::digest(bytes)) + ) + } + + fn composer_service_cfg(uri: &str, source: VendorSource, offline: bool) -> VendorServiceConfig { + VendorServiceConfig { + source, + client: Some(ApiClient::new(ApiClientOptions { + api_url: uri.to_string(), + api_token: Some("sktsec_placeholder_value_for_tests_api".into()), + use_public_proxy: false, + org_slug: Some("acme".into()), + })), + use_public_proxy: false, + vendor_url: None, + patch_server_url: None, + offline, + } + } + + /// Build a composer dist zip with a single variable top-level dir. + fn make_dist_zip(top: &str, files: &[(&str, &[u8])]) -> Vec { + use std::io::Write as _; + let mut cursor = std::io::Cursor::new(Vec::new()); + { + let mut zw = zip::ZipWriter::new(&mut cursor); + let opts = zip::write::SimpleFileOptions::default(); + for (rel, content) in files { + zw.start_file(format!("{top}/{rel}"), opts).unwrap(); + zw.write_all(content).unwrap(); + } + zw.finish().unwrap(); + } + cursor.into_inner() + } + + async fn mount_composer_granted(server: &wiremock::MockServer, sha512: &str, zip_bytes: &[u8]) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + let serve_path = format!("/patch/composer/psr/log/3.0.2/tok/{UUID}/psr-log-3.0.2.zip"); + let serve_url = format!("{}{serve_path}", server.uri()); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "results": { UUID: { + "status": "granted", + "url": serve_url, + "purl": PURL, + "artifacts": [{ "kind": "tarball", "url": serve_url, + "integrity": { "sha512": sha512 } }] + }} + }))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path(serve_path)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(zip_bytes.to_vec())) + .mount(server) + .await; + } + + async fn mount_composer_status(server: &wiremock::MockServer, status: &str) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "results": { UUID: { "status": status, "url": null, "artifacts": [] } } + }))) + .mount(server) + .await; + } + + async fn vendor_with_service( + root: &Path, + blobs: &Path, + installed: &Path, + record: &PatchRecord, + cfg: &VendorServiceConfig, + ) -> VendorOutcome { + let sources = PatchSources::blobs_only(blobs); + vendor_composer( + PURL, + installed, + root, + record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(cfg), + ) + .await + } + + /// Service success: the prebuilt dist zip is extracted into the copy dir + /// (patched bytes), the lock is rewired, and a `vendor_prebuilt_downloaded` + /// advisory is emitted — WITHOUT touching the installed package. + #[tokio::test] + async fn service_success_extracts_dist_and_rewrites_lock() { + let lock = lock_value("psr/log", "3.0.2", false); + let (dir, blobs, _installed, record) = fixture(&lock).await; + let root = dir.path(); + let zip = make_dist_zip( + "php-fig-log-f16e1d5", + &[ + ("src/LoggerInterface.php", PATCHED), + ("composer.json", b"{\"name\": \"psr/log\"}\n"), + ], + ); + let sri = sri_sha512(&zip); + let server = wiremock::MockServer::start().await; + mount_composer_granted(&server, &sri, &zip).await; + + let bogus_installed = root.join("no-such-install"); + let (result, entry, warnings) = unwrap_done( + vendor_with_service( + root, + &blobs, + &bogus_installed, + &record, + &composer_service_cfg(&server.uri(), VendorSource::Service, false), + ) + .await, + ); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + let copy = root.join(copy_rel()); + assert_eq!( + tokio::fs::read(copy.join("src/LoggerInterface.php")).await.unwrap(), + PATCHED + ); + let lock_text = tokio::fs::read_to_string(root.join(COMPOSER_LOCK)).await.unwrap(); + assert!(lock_text.contains(©_rel()), "lock rewired to the copy: {lock_text}"); + assert!(warnings.iter().any(|w| w.code == "vendor_prebuilt_downloaded")); + } + + /// `service` mode + integrity mismatch hard-fails, nothing extracted. + #[tokio::test] + async fn service_integrity_mismatch_service_mode_hard_fails() { + let lock = lock_value("psr/log", "3.0.2", false); + let (dir, blobs, installed, record) = fixture(&lock).await; + let root = dir.path(); + let zip = make_dist_zip("x", &[("src/LoggerInterface.php", PATCHED)]); + let wrong = sri_sha512(b"different bytes"); + let server = wiremock::MockServer::start().await; + mount_composer_granted(&server, &wrong, &zip).await; + + let (code, _) = unwrap_refused( + vendor_with_service( + root, + &blobs, + &installed, + &record, + &composer_service_cfg(&server.uri(), VendorSource::Service, false), + ) + .await, + ); + assert_eq!(code, "vendor_prebuilt_required"); + assert!(!root.join(format!(".socket/vendor/composer/{UUID}")).exists()); + } + + /// `auto` + a not-built service status falls back to the local build. + #[tokio::test] + async fn service_unavailable_auto_falls_back_to_build() { + let lock = lock_value("psr/log", "3.0.2", false); + let (dir, blobs, installed, record) = fixture(&lock).await; + let root = dir.path(); + let server = wiremock::MockServer::start().await; + mount_composer_status(&server, "not_found").await; + + let (result, entry, _) = unwrap_done( + vendor_with_service( + root, + &blobs, + &installed, + &record, + &composer_service_cfg(&server.uri(), VendorSource::Auto, false), + ) + .await, + ); + assert!(result.success, "auto must fall back to the local build: {:?}", result.error); + assert!(entry.is_some()); + assert_eq!( + tokio::fs::read(root.join(copy_rel()).join("src/LoggerInterface.php")).await.unwrap(), + PATCHED + ); + } + + /// `--offline` + `--vendor-source=service` refuses without any network. + #[tokio::test] + async fn offline_service_mode_refuses() { + let lock = lock_value("psr/log", "3.0.2", false); + let (dir, blobs, installed, record) = fixture(&lock).await; + let root = dir.path(); + let (code, _) = unwrap_refused( + vendor_with_service( + root, + &blobs, + &installed, + &record, + &composer_service_cfg("http://127.0.0.1:1", VendorSource::Service, true), + ) + .await, + ); + assert_eq!(code, "vendor_service_offline_conflict"); + } } diff --git a/crates/socket-patch-core/src/patch/vendor/gem.rs b/crates/socket-patch-core/src/patch/vendor/gem.rs index 8a4ad73..7a11550 100644 --- a/crates/socket-patch-core/src/patch/vendor/gem.rs +++ b/crates/socket-patch-core/src/patch/vendor/gem.rs @@ -63,10 +63,14 @@ use crate::utils::fs::atomic_write_bytes; use crate::utils::purl::{build_gem_purl, parse_gem_purl}; use super::path::vendor_uuid_dir_rel; +use super::registry_fetch::extract_gem_data; +use super::service_fetch::{ + fetch_verified_archive, fetch_verified_secondary, SecondaryArtifactResult, ServiceArtifact, +}; use super::state::{ write_marker, VendorArtifact, VendorEntry, VendorMarker, WiringAction, WiringRecord, }; -use super::{RevertOutcome, VendorOutcome, VendorWarning}; +use super::{RevertOutcome, VendorOutcome, VendorServiceConfig, VendorWarning}; const GEMFILE: &str = "Gemfile"; const GEMFILE_LOCK: &str = "Gemfile.lock"; @@ -103,11 +107,18 @@ const MARKER_SCHEMA_VERSION: u32 = 1; /// /// `installed_dir` is the crawler's gem dir (`/gems/-`, /// the same root `apply` patches — manifest file keys resolve relative to it); -/// the stub gemspec is derived from it +/// the LOCAL build's stub gemspec is derived from it /// (`/specifications/-.gemspec` — `specifications/` /// is a sibling of `gems/`). /// -/// Edit order: copy+patch → Gemfile → Gemfile.lock; a lock-edit failure +/// `service` (when configured) lets the materialise step download the prebuilt +/// patched `.gem` + the converter's `gem-stub-gemspec` second artifact from +/// patch.socket.dev instead of copying + patching locally — no local install +/// or stub needed (`auto` falls back to the local build on a miss, `service` +/// fails closed). The wiring (Gemfile + Gemfile.lock pair edit) is identical +/// either way; only how `copy_dir` + its `.gemspec` are produced differs. +/// +/// Edit order: materialise → Gemfile → Gemfile.lock; a lock-edit failure /// unwinds the Gemfile to its recorded original bytes, so the pair is never /// left half-wired. #[allow(clippy::too_many_arguments)] @@ -120,6 +131,7 @@ pub async fn vendor_gem( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&VendorServiceConfig>, ) -> VendorOutcome { // ── coordinates ────────────────────────────────────────────────────── let Some((name, version)) = parse_gem_purl(purl) else { @@ -204,38 +216,37 @@ pub async fn vendor_gem( } }; - // ── stub gemspec ───────────────────────────────────────────────────── + // ── stub gemspec (local) ───────────────────────────────────────────── // `specifications/` is a sibling of `gems/`; derive it from installed_dir. + // The read is non-fatal: the LOCAL build needs this stub, but the service + // path brings its own (the converter-generated `gem-stub-gemspec`), so an + // auto-fetched (not-installed) gem whose only `installed_dir` is a bare + // `data.tar.gz` extraction can still vendor via the service. The + // `gem_spec_missing` refusal moves into the local-build fallback, where the + // stub is actually required. let spec_src = installed_dir .parent() .and_then(Path::parent) .map(|home| home.join("specifications").join(format!("{leaf}.gemspec"))); - let spec_text = match &spec_src { + let spec_text: Option = match &spec_src { Some(p) => tokio::fs::read_to_string(p).await.ok(), None => None, }; - let Some(spec_text) = spec_text else { - return refused( - "gem_spec_missing", - format!( - "no stub gemspec at {} (a path source cannot be wired without one)", - spec_src - .as_deref() - .map(|p| p.display().to_string()) - .unwrap_or_else(|| "/specifications".to_string()) - ), - ); - }; // Textual heuristic, deliberately fail-closed on a match: bundler skips // extension builds for path sources entirely, so a native gem would // install fine and then fail at `require` time with a missing `.so`. - if gemspec_declares_extensions(&spec_text) { - return refused( - "native_extensions_unsupported", - format!( - "{leaf}.gemspec declares native extensions; bundler does not build extensions for path-sourced gems" - ), - ); + // Only the local stub is checked here (when present); the service stub is + // re-checked in `gem_service_copy`, and a native gem emits no service stub + // at all (the converter refuses it), so the service path also misses. + if let Some(text) = &spec_text { + if gemspec_declares_extensions(text) { + return refused( + "native_extensions_unsupported", + format!( + "{leaf}.gemspec declares native extensions; bundler does not build extensions for path-sourced gems" + ), + ); + } } // ── idempotent hot path ────────────────────────────────────────────── @@ -268,54 +279,39 @@ pub async fn vendor_gem( // missing/stale: rebuild the ARTIFACT only — the pair edit is // already correct and the full path would re-record the live // vendored fragments as `original`, breaking a later --revert. + // Service-preferred like the full path (an auto-fetched gem has no + // local stub to rebuild from — only the service can). if !dry_run { - if let Err(e) = fresh_copy(installed_dir, ©_dir, None).await { - return VendorOutcome::Done { - result: synthesized_result( - purl, - ©_dir, - Vec::new(), - false, - Some(format!("failed to copy installed gem: {e}")), - ), - entry: None, - warnings: Vec::new(), - }; - } - if let Err(e) = - tokio::fs::write(copy_dir.join(format!("{name}.gemspec")), &spec_text).await - { - let _ = remove_tree(&uuid_dir).await; - return VendorOutcome::Done { - result: synthesized_result( - purl, - ©_dir, - Vec::new(), - false, - Some(format!( - "failed to copy the stub gemspec into the vendored dir: {e}" - )), - ), - entry: None, - warnings: Vec::new(), - }; + if let Some(cfg) = service { + if cfg.source.requires_service() && cfg.offline { + return refused( + "vendor_service_offline_conflict", + "--vendor-source=service needs the network but --offline is set" + .to_string(), + ); + } } let mut warnings: Vec = Vec::new(); - let mut result = super::force_apply_staged( + let result = match materialise_patched_copy( purl, + installed_dir, ©_dir, + &uuid_dir, + name, + version, + spec_text.as_deref(), record, sources, - false, force, - name, - version, + service, &mut warnings, ) - .await; - result.package_path = copy_dir.display().to_string(); + .await + { + Ok(result) => result, + Err(outcome) => return *outcome, + }; if !result.success { - let _ = remove_tree(&uuid_dir).await; return VendorOutcome::Done { result, entry: None, @@ -384,61 +380,48 @@ pub async fn vendor_gem( Err(detail) => return refused("gemfile_declaration_not_editable", detail), }; - // ── copy + patch ───────────────────────────────────────────────────── - if let Err(e) = fresh_copy(installed_dir, ©_dir, None).await { - return VendorOutcome::Done { - result: synthesized_result( - purl, - ©_dir, - Vec::new(), - false, - Some(format!("failed to copy installed gem: {e}")), - ), - entry: None, - warnings: Vec::new(), - }; - } - // The vendored dir is freshly created and not yet referenced by anything, - // so a plain write suffices for the gemspec. - if let Err(e) = tokio::fs::write(copy_dir.join(format!("{name}.gemspec")), &spec_text).await { - let _ = remove_tree(&uuid_dir).await; - return VendorOutcome::Done { - result: synthesized_result( - purl, - ©_dir, - Vec::new(), - false, - Some(format!( - "failed to copy the stub gemspec into the vendored dir: {e}" - )), - ), - entry: None, - warnings: Vec::new(), - }; - } + // ── materialise the patched copy ────────────────────────────────────── + // Prefer the prebuilt `.gem` + stub gemspec from the patch service + // (download + extract; no local install or patch-apply needed); else copy + // the installed gem, drop in the local stub gemspec, and apply the patch. let mut warnings: Vec = Vec::new(); - let mut result = super::force_apply_staged( + if let Some(cfg) = service { + if cfg.source.requires_service() && cfg.offline { + return refused( + "vendor_service_offline_conflict", + "--vendor-source=service needs the network but --offline is set".to_string(), + ); + } + } + let mut result = match materialise_patched_copy( purl, + installed_dir, ©_dir, + &uuid_dir, + name, + version, + spec_text.as_deref(), record, sources, - false, force, - name, - version, + service, &mut warnings, ) - .await; - result.package_path = copy_dir.display().to_string(); + .await + { + Ok(result) => result, + Err(outcome) => return *outcome, + }; if !result.success { - // Don't leave a half-built copy; neither file was touched. - let _ = remove_tree(&uuid_dir).await; + // The copy / stub / patch step left the result un-successful (and + // cleaned up its own partial copy); neither project file was touched. return VendorOutcome::Done { result, entry: None, warnings, }; } + result.package_path = copy_dir.display().to_string(); // ── Gemfile edit ───────────────────────────────────────────────────── let new_gemfile = apply_gemfile_plan(&gemfile_text, &plan); @@ -591,6 +574,263 @@ pub async fn vendor_gem( } } +// ── materialisation (service download / local build) ────────────────────────── + +/// The path-source stub gemspec served as the gem's SECOND artifact, alongside +/// the `.gem` (mirrors npm's `yarn-berry-zip`). The converter generates it +/// because a `.gem` only carries the gemspec as YAML in `metadata.gz`, not the +/// eval-able Ruby form a bundler path source loads. +const GEM_STUB_ARTIFACT_KIND: &str = "gem-stub-gemspec"; + +/// Outcome of attempting to materialise the gem copy from the patch service. +enum GemServiceCopy { + /// The prebuilt `.gem` was extracted into `copy_dir` and the verified stub + /// gemspec written as `.gemspec`. + Used, + /// Bubble this terminal outcome (boxed — `VendorOutcome` is large). + HardFail(Box), + /// Fall back to copying the installed gem + local stub and patching it. + FallBack, +} + +/// Download the prebuilt `.gem` + its `gem-stub-gemspec` secondary artifact, +/// integrity-verify both, extract the `.gem`'s `data.tar.gz` into `copy_dir`, +/// and write the stub as `.gemspec`. The extracted `.gem` IS the patched +/// package the converter built, so it needs no local install — the point of +/// the service path. Maps each service outcome onto the `auto` / `service` +/// fallback policy. +/// +/// A MISSING stub artifact is a terminal miss (fall back under `auto`, refuse +/// under `service`): it means either a native-extension gem (the converter +/// emits no stub — bundler can't build extensions for a path source) or a gem +/// patch built before the stub rollout (the invalidation migration rebuilds +/// those). The downloaded stub is re-checked for native extensions as defense +/// in depth. +async fn gem_service_copy( + service: Option<&VendorServiceConfig>, + record: &PatchRecord, + name: &str, + copy_dir: &Path, + uuid_dir: &Path, + warnings: &mut Vec, +) -> GemServiceCopy { + let Some(cfg) = service else { + return GemServiceCopy::FallBack; + }; + if !cfg.service_enabled() { + return GemServiceCopy::FallBack; + } + fn hard(code: &'static str, detail: String) -> GemServiceCopy { + GemServiceCopy::HardFail(Box::new(VendorOutcome::Refused { code, detail })) + } + let miss = |warnings: &mut Vec, code: &'static str, reason: String| { + if cfg.source.requires_service() { + hard("vendor_prebuilt_required", reason) + } else { + warnings.push(VendorWarning::new( + code, + format!("{reason}; building locally instead"), + )); + GemServiceCopy::FallBack + } + }; + + // Step 1: the prebuilt `.gem` (sha512-verified against the reference). + let archive = match fetch_verified_archive(cfg, &record.uuid, name).await { + ServiceArtifact::Ready(archive) => archive, + ServiceArtifact::IntegrityMismatch(reason) => { + return miss( + warnings, + "vendor_prebuilt_integrity_mismatch", + format!("prebuilt .gem failed integrity ({reason})"), + ); + } + ServiceArtifact::Pending => { + return miss( + warnings, + "vendor_prebuilt_pending", + "prebuilt .gem is still building".to_string(), + ); + } + ServiceArtifact::Unavailable(reason) => { + if cfg.source.requires_service() { + return hard( + "vendor_prebuilt_required", + format!("prebuilt .gem unavailable: {reason}"), + ); + } + return GemServiceCopy::FallBack; + } + ServiceArtifact::Failed(reason) => { + return miss( + warnings, + "vendor_prebuilt_unavailable", + format!("patch service request failed ({reason})"), + ); + } + }; + + // Step 2: the stub gemspec the converter generated alongside the `.gem`. + let stub = match fetch_verified_secondary(cfg, &archive, GEM_STUB_ARTIFACT_KIND, name).await { + SecondaryArtifactResult::Ready(bytes) => bytes, + SecondaryArtifactResult::Absent => { + return miss( + warnings, + "vendor_prebuilt_stub_missing", + "the patch service served no stub gemspec for this gem (a native-extension \ + gem, or a patch built before the stub rollout)" + .to_string(), + ); + } + SecondaryArtifactResult::IntegrityMismatch(reason) => { + return miss( + warnings, + "vendor_prebuilt_integrity_mismatch", + format!("prebuilt stub gemspec failed integrity ({reason})"), + ); + } + SecondaryArtifactResult::Failed(reason) => { + return miss( + warnings, + "vendor_prebuilt_unavailable", + format!("could not fetch the stub gemspec ({reason})"), + ); + } + }; + + // Defense in depth: the converter does not emit a stub for native gems, but + // refuse one here too — bundler silently skips extension builds for path + // sources, so a native gem would install and then fail at `require` time. + if gemspec_declares_extensions(&String::from_utf8_lossy(&stub)) { + return hard( + "native_extensions_unsupported", + format!( + "the served stub gemspec for {name} declares native extensions; bundler does \ + not build extensions for path-sourced gems" + ), + ); + } + + // Extract the patched `.gem`'s data.tar.gz into a clean copy dir, then add + // the stub as `.gemspec` (a `.gem`'s data.tar.gz never carries one — + // the gemspec lives in metadata.gz). + let _ = remove_tree(copy_dir).await; + if let Err(e) = tokio::fs::create_dir_all(copy_dir).await { + return hard( + "vendor_prebuilt_write_failed", + format!("cannot create {}: {e}", copy_dir.display()), + ); + } + if let Err(e) = extract_gem_data(&archive.bytes, copy_dir) { + let _ = remove_tree(uuid_dir).await; + return hard( + "vendor_prebuilt_extract_failed", + format!("cannot extract the prebuilt .gem: {e}"), + ); + } + if let Err(e) = tokio::fs::write(copy_dir.join(format!("{name}.gemspec")), &stub).await { + let _ = remove_tree(uuid_dir).await; + return hard( + "vendor_prebuilt_write_failed", + format!("cannot write the stub gemspec into the vendored dir: {e}"), + ); + } + warnings.push(VendorWarning::new( + "vendor_prebuilt_downloaded", + format!( + "vendored {name} from the patch service ({})", + archive.source_url + ), + )); + GemServiceCopy::Used +} + +/// Materialise the patched copy at `copy_dir` plus its `.gemspec` stub, +/// service-download first (see [`gem_service_copy`]) and local copy+stub+apply +/// as the fallback. Returns the verify [`ApplyResult`] (a synthesized +/// `AlreadyPatched` on the service path), or a terminal [`VendorOutcome`] to +/// bubble. A non-fatal copy/stub/patch failure is surfaced as an UN-successful +/// `ApplyResult` (the caller returns it as a `Done` with no ledger entry); this +/// helper cleans up its own partial copy in that case. +#[allow(clippy::too_many_arguments)] +async fn materialise_patched_copy( + purl: &str, + installed_dir: &Path, + copy_dir: &Path, + uuid_dir: &Path, + name: &str, + version: &str, + spec_text: Option<&str>, + record: &PatchRecord, + sources: &PatchSources<'_>, + force: bool, + service: Option<&VendorServiceConfig>, + warnings: &mut Vec, +) -> Result> { + match gem_service_copy(service, record, name, copy_dir, uuid_dir, warnings).await { + GemServiceCopy::Used => { + // The service `.gem` is the patched package; trust its verified + // integrity (every file reads as AlreadyPatched). + let verified = record + .files + .keys() + .map(|f| already_patched_verify(f)) + .collect(); + Ok(synthesized_result(purl, copy_dir, verified, true, None)) + } + GemServiceCopy::HardFail(outcome) => Err(outcome), + GemServiceCopy::FallBack => { + // The local build needs the stub gemspec from the installed gem's + // `specifications/` dir — absent for an auto-fetched (not-installed) + // gem, whose only route is the service path. + let Some(spec_text) = spec_text else { + return Err(Box::new(refused( + "gem_spec_missing", + format!( + "no local stub gemspec for {name}@{version} (a path source cannot be \ + wired without one); install the gem or use --vendor-source=service" + ), + ))); + }; + if let Err(e) = fresh_copy(installed_dir, copy_dir, None).await { + return Ok(synthesized_result( + purl, + copy_dir, + Vec::new(), + false, + Some(format!("failed to copy installed gem: {e}")), + )); + } + // The vendored dir is freshly created and not yet referenced by + // anything, so a plain write suffices for the gemspec. + if let Err(e) = + tokio::fs::write(copy_dir.join(format!("{name}.gemspec")), spec_text).await + { + let _ = remove_tree(uuid_dir).await; + return Ok(synthesized_result( + purl, + copy_dir, + Vec::new(), + false, + Some(format!( + "failed to copy the stub gemspec into the vendored dir: {e}" + )), + )); + } + let mut result = super::force_apply_staged( + purl, copy_dir, record, sources, false, force, name, version, warnings, + ) + .await; + result.package_path = copy_dir.display().to_string(); + if !result.success { + // Don't leave a half-built copy; neither project file was touched. + let _ = remove_tree(uuid_dir).await; + } + Ok(result) + } + } +} + /// Revert a gem vendor entry: restore the Gemfile line / delete the managed /// block, splice the lock's spec block back into GEM specs (sorted), the /// original DEPENDENCIES entry back in and the registry CHECKSUMS line back @@ -1503,6 +1743,7 @@ mod tests { "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await } @@ -2185,6 +2426,7 @@ mod tests { "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await } @@ -2601,4 +2843,429 @@ mod tests { v1 ); } + + // ─────────────── service-download path (Tier B: gem) ────────────────── + // + // gem vendors a patched source DIRECTORY plus a stub gemspec, so the + // service path downloads the prebuilt `.gem` AND the `gem-stub-gemspec` + // second artifact, verifies both, extracts the `.gem`'s data.tar.gz into the + // copy dir, and writes the stub as `.gemspec`. Both the service path + // and the local-build fallback are exercised. + + use crate::api::client::{ApiClient, ApiClientOptions}; + use crate::patch::vendor::VendorSource; + + /// A valid path-source stub (no native extensions). + const SERVICE_STUB: &[u8] = b"# -*- encoding: utf-8 -*-\n# stub: rack 3.2.6 ruby lib\n\nGem::Specification.new do |s|\n s.name = \"rack\".freeze\n s.version = \"3.2.6\".freeze\n s.require_paths = [\"lib\".freeze]\nend\n"; + /// A stub that declares native extensions (must be refused). + const SERVICE_STUB_NATIVE: &[u8] = b"Gem::Specification.new do |s|\n s.name = \"rack\".freeze\n s.version = \"3.2.6\".freeze\n s.extensions = [\"ext/rack/extconf.rb\"]\nend\n"; + + fn sri_sha512(bytes: &[u8]) -> String { + use base64::Engine as _; + use sha2::{Digest as _, Sha512}; + format!( + "sha512-{}", + base64::engine::general_purpose::STANDARD.encode(Sha512::digest(bytes)) + ) + } + + fn gem_service_cfg(uri: &str, source: VendorSource, offline: bool) -> VendorServiceConfig { + VendorServiceConfig { + source, + client: Some(ApiClient::new(ApiClientOptions { + api_url: uri.to_string(), + api_token: Some("sktsec_placeholder_value_for_tests_api".into()), + use_public_proxy: false, + org_slug: Some("acme".into()), + })), + use_public_proxy: false, + vendor_url: None, + patch_server_url: None, + offline, + } + } + + /// Build a `.gem` (uncompressed outer tar holding `data.tar.gz` + + /// `metadata.gz`). `data_files` are the inner data.tar.gz entries at the + /// root (no prefix dir), as a real `.gem` carries them. + fn make_gem(data_files: &[(&str, &[u8])]) -> Vec { + use std::io::Write as _; + let mut data_tar = tar::Builder::new(Vec::new()); + for (rel, content) in data_files { + let mut h = tar::Header::new_gnu(); + h.set_size(content.len() as u64); + h.set_mode(0o644); + h.set_cksum(); + data_tar.append_data(&mut h, rel, *content).unwrap(); + } + let data_tar = data_tar.into_inner().unwrap(); + let mut enc = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default()); + enc.write_all(&data_tar).unwrap(); + let data_gz = enc.finish().unwrap(); + // A token metadata.gz: the CLI service path never reads it (it uses the + // served stub), but a real `.gem` always carries one. + let mut menc = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default()); + menc.write_all(b"--- !ruby/object:Gem::Specification\nname: rack\n") + .unwrap(); + let metadata_gz = menc.finish().unwrap(); + let mut outer = tar::Builder::new(Vec::new()); + for (name, bytes) in [ + ("metadata.gz", metadata_gz.as_slice()), + ("data.tar.gz", data_gz.as_slice()), + ] { + let mut h = tar::Header::new_gnu(); + h.set_size(bytes.len() as u64); + h.set_mode(0o644); + h.set_cksum(); + outer.append_data(&mut h, name, bytes).unwrap(); + } + outer.into_inner().unwrap() + } + + /// Mount the two-step granted flow: POST returns the `.gem` (tarball) and, + /// when `stub` is `Some`, the `gem-stub-gemspec` second artifact; GET serves + /// each artifact's bytes. `gem_sha512` / the stub's advertised sha512 are + /// passed explicitly so a test can advertise a WRONG hash. + async fn mount_gem_granted( + server: &wiremock::MockServer, + gem_bytes: &[u8], + gem_sha512: &str, + stub: Option<(&[u8], &str)>, + ) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + let gem_path = format!("/patch/gem/rack/3.2.6/tok/{UUID}/rack-3.2.6.gem"); + let gem_url = format!("{}{gem_path}", server.uri()); + let mut artifacts = vec![serde_json::json!({ + "kind": "tarball", "url": gem_url, + "integrity": { "sha512": gem_sha512 } + })]; + let stub_path = format!("/patch/gem/rack/3.2.6/tok/{UUID}/rack-3.2.6.gemspec"); + if let Some((stub_bytes, stub_sha512)) = stub { + let stub_url = format!("{}{stub_path}", server.uri()); + artifacts.push(serde_json::json!({ + "kind": "gem-stub-gemspec", "url": stub_url, + "integrity": { "sha512": stub_sha512 } + })); + Mock::given(method("GET")) + .and(path(stub_path.clone())) + .respond_with(ResponseTemplate::new(200).set_body_bytes(stub_bytes.to_vec())) + .mount(server) + .await; + } + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "results": { UUID: { + "status": "granted", + "url": gem_url, + "purl": PURL, + "artifacts": artifacts + }} + }))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path(gem_path)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(gem_bytes.to_vec())) + .mount(server) + .await; + } + + async fn mount_gem_status(server: &wiremock::MockServer, status: &str) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "results": { UUID: { "status": status, "url": null, "artifacts": [] } } + }))) + .mount(server) + .await; + } + + /// An `installed_dir` that does NOT exist on disk but is named `` (so + /// the platform-gem check passes): the service path must need no local copy. + fn missing_install(root: &Path) -> PathBuf { + root.join("no-such-install/rack-3.2.6") + } + + fn copy_lib(root: &Path) -> PathBuf { + root.join(format!(".socket/vendor/gem/{UUID}/rack-3.2.6/lib/rack.rb")) + } + + fn copy_gemspec(root: &Path) -> PathBuf { + root.join(format!(".socket/vendor/gem/{UUID}/rack-3.2.6/rack.gemspec")) + } + + /// Service success: the prebuilt `.gem` is extracted into the copy dir, the + /// served stub is written as `rack.gemspec`, the Gemfile + lock are wired, + /// and a `vendor_prebuilt_downloaded` advisory is emitted — WITHOUT a local + /// install (a deliberately-missing `installed_dir`). + #[tokio::test] + async fn service_success_extracts_gem_and_wires_lock() { + let (_tmp, root, _installed, blobs, record) = fixture(GEMFILE_DIRECT, LOCK_DIRECT).await; + let gem = make_gem(&[("lib/rack.rb", PATCHED)]); + let sri = sri_sha512(&gem); + let stub_sri = sri_sha512(SERVICE_STUB); + let server = wiremock::MockServer::start().await; + mount_gem_granted(&server, &gem, &sri, Some((SERVICE_STUB, &stub_sri))).await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_gem( + PURL, + &missing_install(&root), + &root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&gem_service_cfg( + &server.uri(), + VendorSource::Service, + false, + )), + ) + .await; + let (result, entry, warnings) = unwrap_done(outcome); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + assert_eq!(tokio::fs::read(copy_lib(&root)).await.unwrap(), PATCHED); + assert_eq!( + tokio::fs::read(copy_gemspec(&root)).await.unwrap(), + SERVICE_STUB + ); + assert_eq!( + tokio::fs::read_to_string(root.join(GEMFILE_LOCK)) + .await + .unwrap(), + expected_lock_direct() + ); + assert!(warnings + .iter() + .any(|w| w.code == "vendor_prebuilt_downloaded")); + } + + /// `service` mode + a `.gem` integrity mismatch hard-fails; nothing wired. + #[tokio::test] + async fn service_gem_integrity_mismatch_hard_fails() { + let (_tmp, root, installed, blobs, record) = fixture(GEMFILE_DIRECT, LOCK_DIRECT).await; + let gem = make_gem(&[("lib/rack.rb", PATCHED)]); + let wrong = sri_sha512(b"different bytes"); + let stub_sri = sri_sha512(SERVICE_STUB); + let server = wiremock::MockServer::start().await; + mount_gem_granted(&server, &gem, &wrong, Some((SERVICE_STUB, &stub_sri))).await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_gem( + PURL, + &installed, + &root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&gem_service_cfg( + &server.uri(), + VendorSource::Service, + false, + )), + ) + .await; + let (code, _) = unwrap_refused(outcome); + assert_eq!(code, "vendor_prebuilt_required"); + assert!(!root.join(format!(".socket/vendor/gem/{UUID}")).exists()); + // The lock is untouched. + assert_eq!( + tokio::fs::read_to_string(root.join(GEMFILE_LOCK)) + .await + .unwrap(), + LOCK_DIRECT + ); + } + + /// `service` mode + a stub integrity mismatch hard-fails. + #[tokio::test] + async fn service_stub_integrity_mismatch_hard_fails() { + let (_tmp, root, installed, blobs, record) = fixture(GEMFILE_DIRECT, LOCK_DIRECT).await; + let gem = make_gem(&[("lib/rack.rb", PATCHED)]); + let sri = sri_sha512(&gem); + let wrong_stub = sri_sha512(b"not the stub"); + let server = wiremock::MockServer::start().await; + mount_gem_granted(&server, &gem, &sri, Some((SERVICE_STUB, &wrong_stub))).await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_gem( + PURL, + &installed, + &root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&gem_service_cfg( + &server.uri(), + VendorSource::Service, + false, + )), + ) + .await; + let (code, _) = unwrap_refused(outcome); + assert_eq!(code, "vendor_prebuilt_required"); + assert!(!root.join(format!(".socket/vendor/gem/{UUID}")).exists()); + } + + /// `service` mode + a missing stub artifact hard-fails (old un-rebuilt row / + /// native gem): the `.gem` is present but no `gem-stub-gemspec` is served. + #[tokio::test] + async fn service_stub_missing_service_mode_hard_fails() { + let (_tmp, root, installed, blobs, record) = fixture(GEMFILE_DIRECT, LOCK_DIRECT).await; + let gem = make_gem(&[("lib/rack.rb", PATCHED)]); + let sri = sri_sha512(&gem); + let server = wiremock::MockServer::start().await; + mount_gem_granted(&server, &gem, &sri, None).await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_gem( + PURL, + &installed, + &root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&gem_service_cfg( + &server.uri(), + VendorSource::Service, + false, + )), + ) + .await; + let (code, _) = unwrap_refused(outcome); + assert_eq!(code, "vendor_prebuilt_required"); + assert!(!root.join(format!(".socket/vendor/gem/{UUID}")).exists()); + } + + /// `auto` + a missing stub artifact falls back to the LOCAL build (which + /// copies the installed gem + local stub and patches it). + #[tokio::test] + async fn service_stub_missing_auto_falls_back_to_build() { + let (_tmp, root, installed, blobs, record) = fixture(GEMFILE_DIRECT, LOCK_DIRECT).await; + let gem = make_gem(&[("lib/rack.rb", PATCHED)]); + let sri = sri_sha512(&gem); + let server = wiremock::MockServer::start().await; + mount_gem_granted(&server, &gem, &sri, None).await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_gem( + PURL, + &installed, + &root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&gem_service_cfg(&server.uri(), VendorSource::Auto, false)), + ) + .await; + let (result, entry, _) = unwrap_done(outcome); + assert!(result.success, "auto must fall back: {:?}", result.error); + assert!(entry.is_some()); + // The locally-built copy carries the patched content + the LOCAL stub. + assert_eq!(tokio::fs::read(copy_lib(&root)).await.unwrap(), PATCHED); + assert_eq!( + tokio::fs::read_to_string(copy_gemspec(&root)) + .await + .unwrap(), + GEMSPEC + ); + } + + /// `auto` + a not-built service status falls back to the local build. + #[tokio::test] + async fn service_unavailable_auto_falls_back_to_build() { + let (_tmp, root, installed, blobs, record) = fixture(GEMFILE_DIRECT, LOCK_DIRECT).await; + let server = wiremock::MockServer::start().await; + mount_gem_status(&server, "not_found").await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_gem( + PURL, + &installed, + &root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&gem_service_cfg(&server.uri(), VendorSource::Auto, false)), + ) + .await; + let (result, entry, _) = unwrap_done(outcome); + assert!(result.success, "auto must fall back: {:?}", result.error); + assert!(entry.is_some()); + assert_eq!(tokio::fs::read(copy_lib(&root)).await.unwrap(), PATCHED); + } + + /// A served stub that declares native extensions is refused (defense in + /// depth — the converter should never emit one). + #[tokio::test] + async fn service_native_ext_stub_hard_fails() { + let (_tmp, root, _installed, blobs, record) = fixture(GEMFILE_DIRECT, LOCK_DIRECT).await; + let gem = make_gem(&[("lib/rack.rb", PATCHED)]); + let sri = sri_sha512(&gem); + let stub_sri = sri_sha512(SERVICE_STUB_NATIVE); + let server = wiremock::MockServer::start().await; + mount_gem_granted(&server, &gem, &sri, Some((SERVICE_STUB_NATIVE, &stub_sri))).await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_gem( + PURL, + &missing_install(&root), + &root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&gem_service_cfg( + &server.uri(), + VendorSource::Service, + false, + )), + ) + .await; + let (code, _) = unwrap_refused(outcome); + assert_eq!(code, "native_extensions_unsupported"); + } + + /// `--offline` + `--vendor-source=service` refuses without any network. + #[tokio::test] + async fn offline_service_mode_refuses() { + let (_tmp, root, installed, blobs, record) = fixture(GEMFILE_DIRECT, LOCK_DIRECT).await; + let sources = PatchSources::blobs_only(&blobs); + let outcome = vendor_gem( + PURL, + &installed, + &root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&gem_service_cfg( + "http://127.0.0.1:1", + VendorSource::Service, + true, + )), + ) + .await; + let (code, _) = unwrap_refused(outcome); + assert_eq!(code, "vendor_service_offline_conflict"); + } } diff --git a/crates/socket-patch-core/src/patch/vendor/golang.rs b/crates/socket-patch-core/src/patch/vendor/golang.rs index 8134608..e2f7f52 100644 --- a/crates/socket-patch-core/src/patch/vendor/golang.rs +++ b/crates/socket-patch-core/src/patch/vendor/golang.rs @@ -19,19 +19,53 @@ use std::path::Path; use crate::manifest::schema::PatchRecord; -use crate::patch::apply::PatchSources; +use crate::patch::apply::{ApplyResult, PatchSources, VerifyResult, VerifyStatus}; use crate::patch::copy_tree::remove_tree; use crate::patch::go_mod_edit::{ self, read_replace_entries, replace_target_path, ReplaceOwner, GO_PATCHES_DIR, }; -use crate::patch::go_redirect::{apply_go_redirect, are_safe_redirect_coords}; +use crate::patch::go_redirect::{apply_go_redirect, are_safe_redirect_coords, ensure_module_go_mod}; use crate::utils::purl::{parse_golang_purl, strip_purl_qualifiers}; use super::path::vendor_uuid_dir_rel; +use super::registry_fetch::extract_zip_with_prefix; +use super::service_fetch::{fetch_verified_archive, ServiceArtifact}; use super::state::{ write_marker, VendorArtifact, VendorEntry, VendorMarker, WiringAction, WiringRecord, }; -use super::{RevertOutcome, VendorOutcome, VendorWarning}; +use super::{RevertOutcome, VendorOutcome, VendorServiceConfig, VendorWarning}; + +fn already_patched_verify(file: &str) -> VerifyResult { + VerifyResult { + file: file.to_string(), + status: VerifyStatus::AlreadyPatched, + message: None, + current_hash: None, + expected_hash: None, + target_hash: None, + } +} + +/// A synthesized success [`ApplyResult`] for a service-downloaded module: there +/// is no local apply to verify (the downloaded zip IS the patched module), so +/// every patched file reads as `AlreadyPatched` — trust is the verified service +/// integrity (sha512 + the `h1:` module dirhash). +fn synthesized_success( + purl: &str, + copy_dir: &Path, + files_verified: Vec, +) -> ApplyResult { + ApplyResult { + package_key: purl.to_string(), + package_path: copy_dir.display().to_string(), + success: true, + files_verified, + files_patched: Vec::new(), + applied_via: std::collections::HashMap::new(), + error: None, + sidecar: None, + } +} /// The committed copy exists and every patched file matches its afterHash. async fn copy_hashes_ok( @@ -72,6 +106,7 @@ pub async fn vendor_go_module( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&VendorServiceConfig>, ) -> VendorOutcome { // ── coordinate validation (fail-closed, before any disk access) ────── let Some((module, version)) = parse_golang_purl(purl) else { @@ -140,39 +175,79 @@ pub async fn vendor_go_module( // content. The engine is shared with the in-place `apply` redirect // path, whose strict semantics stay unchanged. let mut warnings: Vec = Vec::new(); - if !force { - let missing = super::missing_existing_patch_files(pristine_src, &record.files).await; - if let Some(first) = missing.first() { - return VendorOutcome::Done { - result: super::failed_apply_result( - purl, - format!("Cannot apply patch: {first} - File not found"), - ), - entry: None, - warnings, + if let Some(cfg) = service { + if cfg.source.requires_service() && cfg.offline { + return VendorOutcome::Refused { + code: "vendor_service_offline_conflict", + detail: "--vendor-source=service needs the network but --offline is set" + .to_string(), }; } } - // The engine does the heavy lifting: fresh copy → hardened apply pipeline - // → `replace` upsert (which refuses a user-authored same-version pin). - let result = apply_go_redirect( - purl, + // Acquire the patched module: prefer the prebuilt module zip from the patch + // service (download → verify → extract → wire the `replace`, no pristine + // source needed); else let the engine copy the pristine source, patch it, + // and wire the `replace`. + let result = match go_service_redirect( + service, + record, module, version, - pristine_src, - project_root, &base_rel, - &record.files, - sources, - Some(&record.uuid), - dry_run, - crate::patch::apply::MismatchPolicy::Force, + ©_dir, + project_root, + &mut warnings, ) - .await; - if result.success { - warnings.extend(super::mismatch_overwrite_warnings(&result, module, version)); - } + .await + { + GoServiceRedirect::Used => { + let verified = record.files.keys().map(|f| already_patched_verify(f)).collect(); + synthesized_success(purl, ©_dir, verified) + } + GoServiceRedirect::HardFail(outcome) => return *outcome, + GoServiceRedirect::FallBack => { + // Vendor auto-force policy (the engine's copy is staged from the + // pristine source, never the user's tree): missing patch targets + // still fail closed unless the caller's own `--force` asked for the + // skip tolerance. + if !force { + let missing = + super::missing_existing_patch_files(pristine_src, &record.files).await; + if let Some(first) = missing.first() { + return VendorOutcome::Done { + result: super::failed_apply_result( + purl, + format!("Cannot apply patch: {first} - File not found"), + ), + entry: None, + warnings, + }; + } + } + // The engine does the heavy lifting: fresh copy → hardened apply + // pipeline → `replace` upsert (refuses a user-authored same-version + // pin). + let result = apply_go_redirect( + purl, + module, + version, + pristine_src, + project_root, + &base_rel, + &record.files, + sources, + Some(&record.uuid), + dry_run, + crate::patch::apply::MismatchPolicy::Force, + ) + .await; + if result.success { + warnings.extend(super::mismatch_overwrite_warnings(&result, module, version)); + } + result + } + }; if dry_run { return VendorOutcome::Done { @@ -341,6 +416,123 @@ pub async fn vendor_go_module( } } +/// Outcome of attempting to materialise the go copy from the patch service. +enum GoServiceRedirect { + /// The prebuilt module zip was extracted and the `replace` wired. + Used, + /// Bubble this terminal outcome (boxed — `VendorOutcome` is large). + HardFail(Box), + /// Fall back to copying + patching the pristine module source. + FallBack, +} + +/// Download the prebuilt module zip, verify it (sha512 + the `h1:` dirhash, +/// done by `fetch_verified_archive`), extract it into `copy_dir` (stripping its +/// `{module}@{version}/` prefix), ensure a `go.mod`, and wire the `replace` +/// directive — the same end state `apply_go_redirect` produces, minus the copy +/// + local apply. Maps each service outcome onto the `auto` / `service` policy. +#[allow(clippy::too_many_arguments)] +async fn go_service_redirect( + service: Option<&VendorServiceConfig>, + record: &PatchRecord, + module: &str, + version: &str, + base_rel: &str, + copy_dir: &Path, + project_root: &Path, + warnings: &mut Vec, +) -> GoServiceRedirect { + let Some(cfg) = service else { + return GoServiceRedirect::FallBack; + }; + // An empty-files patch is a degenerate no-op; let the engine's empty + // handling deal with it rather than downloading anything. + if !cfg.service_enabled() || record.files.is_empty() { + return GoServiceRedirect::FallBack; + } + fn hard(code: &'static str, detail: String) -> GoServiceRedirect { + GoServiceRedirect::HardFail(Box::new(VendorOutcome::Refused { code, detail })) + } + let miss = |warnings: &mut Vec, code: &'static str, reason: String| { + if cfg.source.requires_service() { + hard("vendor_prebuilt_required", reason) + } else { + warnings.push(VendorWarning::new(code, format!("{reason}; building locally instead"))); + GoServiceRedirect::FallBack + } + }; + match fetch_verified_archive(cfg, &record.uuid, module).await { + ServiceArtifact::Ready(archive) => { + // Clean copy dir; extract the module zip (strip its literal + // `{module}@{version}/` prefix) into it. + let _ = remove_tree(copy_dir).await; + if let Err(e) = tokio::fs::create_dir_all(copy_dir).await { + return hard( + "vendor_prebuilt_write_failed", + format!("cannot create {}: {e}", copy_dir.display()), + ); + } + let prefix = format!("{module}@{version}/"); + if let Err(e) = extract_zip_with_prefix(&archive.bytes, copy_dir, &prefix) { + let _ = remove_tree(&project_root.join(base_rel)).await; + return hard( + "vendor_prebuilt_extract_failed", + format!("cannot extract the prebuilt module zip: {e}"), + ); + } + // A `replace` target needs a go.mod declaring the module path; + // pre-modules zips may lack one — synthesize the minimal form. + if let Err(e) = ensure_module_go_mod(copy_dir, module).await { + let _ = remove_tree(&project_root.join(base_rel)).await; + return hard( + "vendor_prebuilt_write_failed", + format!("cannot synthesize go.mod for the copy: {e}"), + ); + } + if let Err(e) = + go_mod_edit::ensure_replace_entry(project_root, module, version, base_rel, false) + .await + { + let _ = remove_tree(&project_root.join(base_rel)).await; + return hard( + "vendor_prebuilt_wire_failed", + format!("failed to update go.mod: {e}"), + ); + } + warnings.push(VendorWarning::new( + "vendor_prebuilt_downloaded", + format!("vendored {module} from the patch service ({})", archive.source_url), + )); + GoServiceRedirect::Used + } + ServiceArtifact::IntegrityMismatch(reason) => miss( + warnings, + "vendor_prebuilt_integrity_mismatch", + format!("prebuilt module zip failed integrity ({reason})"), + ), + ServiceArtifact::Pending => miss( + warnings, + "vendor_prebuilt_pending", + "prebuilt module zip is still building".to_string(), + ), + ServiceArtifact::Unavailable(reason) => { + if cfg.source.requires_service() { + hard( + "vendor_prebuilt_required", + format!("prebuilt module zip unavailable: {reason}"), + ) + } else { + GoServiceRedirect::FallBack + } + } + ServiceArtifact::Failed(reason) => miss( + warnings, + "vendor_prebuilt_unavailable", + format!("patch service request failed ({reason})"), + ), + } +} + /// Revert one vendored Go module: drop the vendor-owned `replace` directive /// and remove the uuid dir. A taken-over go-patches redirect is **not** /// restored (warned: re-run `socket-patch apply`). @@ -510,6 +702,7 @@ mod tests { "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await } @@ -984,4 +1177,223 @@ mod tests { ); assert!(!root.join(format!(".socket/vendor/golang/{UUID}")).exists()); } + + // ─────────────── service-download path (Tier B: golang) ─────────────── + // + // golang vendors a patched module DIRECTORY behind a go.mod `replace`, so + // the service path downloads the prebuilt module zip, verifies it (sha512 + + // the `h1:` dirhash), extracts it into the copy dir, and wires the replace. + + use crate::api::client::{ApiClient, ApiClientOptions}; + use crate::patch::vendor::{VendorServiceConfig, VendorSource}; + + fn sri_sha512(bytes: &[u8]) -> String { + use base64::Engine as _; + use sha2::{Digest as _, Sha512}; + format!( + "sha512-{}", + base64::engine::general_purpose::STANDARD.encode(Sha512::digest(bytes)) + ) + } + + fn go_service_cfg(uri: &str, source: VendorSource, offline: bool) -> VendorServiceConfig { + VendorServiceConfig { + source, + client: Some(ApiClient::new(ApiClientOptions { + api_url: uri.to_string(), + api_token: Some("sktsec_placeholder_value_for_tests_api".into()), + use_public_proxy: false, + org_slug: Some("acme".into()), + })), + use_public_proxy: false, + vendor_url: None, + patch_server_url: None, + offline, + } + } + + /// Build a Go module zip (entries prefixed `{MODULE}@{VERSION}/`). + fn make_module_zip(files: &[(&str, &[u8])]) -> Vec { + use std::io::Write as _; + let mut cursor = std::io::Cursor::new(Vec::new()); + { + let mut zw = zip::ZipWriter::new(&mut cursor); + let opts = zip::write::SimpleFileOptions::default(); + for (rel, content) in files { + zw.start_file(format!("{MODULE}@{VERSION}/{rel}"), opts).unwrap(); + zw.write_all(content).unwrap(); + } + zw.finish().unwrap(); + } + cursor.into_inner() + } + + async fn mount_go_granted( + server: &wiremock::MockServer, + sha512: &str, + dirhash_h1: Option<&str>, + zip_bytes: &[u8], + ) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + let serve_path = format!("/patch/golang/{MODULE}/{VERSION}/tok/{UUID}/bar-{VERSION}.zip"); + let serve_url = format!("{}{serve_path}", server.uri()); + let mut integrity = serde_json::json!({ "sha512": sha512 }); + if let Some(h1) = dirhash_h1 { + integrity["dirhashH1"] = serde_json::Value::from(h1); + } + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "results": { UUID: { + "status": "granted", + "url": serve_url, + "purl": PURL, + "artifacts": [{ "kind": "tarball", "url": serve_url, "integrity": integrity }] + }} + }))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path(serve_path)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(zip_bytes.to_vec())) + .mount(server) + .await; + } + + async fn mount_go_status(server: &wiremock::MockServer, status: &str) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "results": { UUID: { "status": status, "url": null, "artifacts": [] } } + }))) + .mount(server) + .await; + } + + /// Service success: the prebuilt module zip is extracted into the copy dir + /// (patched bytes), the go.mod `replace` is wired, and a + /// `vendor_prebuilt_downloaded` advisory is emitted — WITHOUT touching the + /// pristine source (a deliberately-missing path). + #[tokio::test] + async fn service_success_extracts_module_and_wires_replace() { + let (dir, blobs, _pristine, record) = fixture().await; + let root = dir.path(); + let zip = make_module_zip(&[ + ("go.mod", b"module github.com/foo/bar\n\ngo 1.21\n"), + ("bar.go", PATCHED), + ]); + let sri = sri_sha512(&zip); + let server = wiremock::MockServer::start().await; + mount_go_granted(&server, &sri, None, &zip).await; + let sources = PatchSources::blobs_only(&blobs); + + let bogus_pristine = root.join("no-such-cache"); + let outcome = vendor_go_module( + PURL, + &bogus_pristine, + root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&go_service_cfg(&server.uri(), VendorSource::Service, false)), + ) + .await; + let (result, entry, warnings) = expect_done(outcome); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + let copy = root.join(copy_rel()); + assert_eq!(tokio::fs::read(copy.join("bar.go")).await.unwrap(), PATCHED); + let entries = read_replace_entries(root).await; + let e = entries.iter().find(|e| e.module == MODULE).expect("replace wired"); + assert_eq!(e.owner, Some(ReplaceOwner::Vendor)); + assert_eq!(e.path.as_deref(), Some(format!("./{}", copy_rel()).as_str())); + assert!(warnings.iter().any(|w| w.code == "vendor_prebuilt_downloaded")); + } + + /// `service` mode + a wrong `h1:` dirhash hard-fails (verifies the + /// golang-specific dirhash check), nothing wired. + #[tokio::test] + async fn service_wrong_dirhash_h1_service_mode_hard_fails() { + let (dir, blobs, pristine, record) = fixture().await; + let root = dir.path(); + let zip = make_module_zip(&[ + ("go.mod", b"module github.com/foo/bar\n\ngo 1.21\n"), + ("bar.go", PATCHED), + ]); + let sri = sri_sha512(&zip); // correct sha512 + let server = wiremock::MockServer::start().await; + mount_go_granted(&server, &sri, Some("h1:bogusdirhashvaluethatwontmatch="), &zip).await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_go_module( + PURL, + &pristine, + root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&go_service_cfg(&server.uri(), VendorSource::Service, false)), + ) + .await; + expect_refused(outcome, "vendor_prebuilt_required"); + assert!(!root.join(format!(".socket/vendor/golang/{UUID}")).exists()); + } + + /// `auto` + a not-built service status falls back to the local build. + #[tokio::test] + async fn service_unavailable_auto_falls_back_to_build() { + let (dir, blobs, pristine, record) = fixture().await; + let root = dir.path(); + let server = wiremock::MockServer::start().await; + mount_go_status(&server, "not_found").await; + let sources = PatchSources::blobs_only(&blobs); + + let outcome = vendor_go_module( + PURL, + &pristine, + root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&go_service_cfg(&server.uri(), VendorSource::Auto, false)), + ) + .await; + let (result, entry, _) = expect_done(outcome); + assert!(result.success, "auto must fall back to the local build: {:?}", result.error); + assert!(entry.is_some()); + assert_eq!( + tokio::fs::read(root.join(copy_rel()).join("bar.go")).await.unwrap(), + PATCHED + ); + } + + /// `--offline` + `--vendor-source=service` refuses without any network. + #[tokio::test] + async fn offline_service_mode_refuses() { + let (dir, blobs, pristine, record) = fixture().await; + let root = dir.path(); + let sources = PatchSources::blobs_only(&blobs); + let outcome = vendor_go_module( + PURL, + &pristine, + root, + &record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&go_service_cfg("http://127.0.0.1:1", VendorSource::Service, true)), + ) + .await; + expect_refused(outcome, "vendor_service_offline_conflict"); + } } diff --git a/crates/socket-patch-core/src/patch/vendor/mod.rs b/crates/socket-patch-core/src/patch/vendor/mod.rs index 1906cfc..591094e 100644 --- a/crates/socket-patch-core/src/patch/vendor/mod.rs +++ b/crates/socket-patch-core/src/patch/vendor/mod.rs @@ -69,6 +69,7 @@ pub mod pypi_requirements; pub mod pypi_uv; pub mod pypi_wheel; pub mod registry_fetch; +pub(crate) mod service_fetch; mod toml_surgery; pub mod verify; pub mod yarn_berry_lock; @@ -104,6 +105,93 @@ impl VendorWarning { } } +/// Where `vendor` acquires the installable patched artifact for a package. +/// +/// * `Auto` (default) — try the patch.socket.dev vendoring service first and +/// silently fall back to a local build on any non-fatal miss (offline, +/// pending build, not found, network error). The downloaded bytes are always +/// integrity-verified before use. +/// * `Service` — require the vendoring service; fail closed on a miss. Useful +/// for CI / exercising the service path exclusively. +/// * `Build` — always build the artifact locally (the pre-service behavior; +/// never contacts the vendoring service). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum VendorSource { + #[default] + Auto, + Service, + Build, +} + +impl VendorSource { + /// Short lowercase tag, suitable for JSON output and `--vendor-source` + /// flag values. + pub fn as_tag(&self) -> &'static str { + match self { + VendorSource::Auto => "auto", + VendorSource::Service => "service", + VendorSource::Build => "build", + } + } + + /// Parse a `--vendor-source` / `SOCKET_VENDOR_SOURCE` token (case-insensitive, + /// surrounding whitespace trimmed). + pub fn parse(s: &str) -> Result { + match s.trim().to_ascii_lowercase().as_str() { + "auto" => Ok(VendorSource::Auto), + "service" => Ok(VendorSource::Service), + "build" => Ok(VendorSource::Build), + other => Err(format!( + "unknown vendor source '{other}'. Expected auto, service, or build." + )), + } + } + + /// Whether this mode may contact the vendoring service at all. + pub fn may_use_service(&self) -> bool { + matches!(self, VendorSource::Auto | VendorSource::Service) + } + + /// Whether a service miss must fail closed (no local-build fallback). + pub fn requires_service(&self) -> bool { + matches!(self, VendorSource::Service) + } +} + +/// Everything the vendor backends need to (optionally) download a prebuilt +/// patched archive from the patch.socket.dev vendoring service. +/// +/// Built once per `vendor` run in the CLI and threaded as +/// `Option<&VendorServiceConfig>` through the dispatch chain — `None` means +/// "build-only" (the pre-service behavior), which keeps every caller that +/// doesn't opt in (and every existing test) unchanged. +#[derive(Debug, Clone)] +pub struct VendorServiceConfig { + /// The `auto` / `service` / `build` policy. + pub source: VendorSource, + /// The run-level API client (reused from the CLI). `None` disables the + /// service path even under `auto`/`service` (treated as a miss / refusal). + pub client: Option, + /// True when the client targets the public proxy (tokenless) — drives + /// `freeOnly` on the package-reference request. + pub use_public_proxy: bool, + /// Optional override for the step-1 package-reference base host. + pub vendor_url: Option, + /// Optional override for the step-2 download host (rewrites the host of the + /// server-returned absolute URL). + pub patch_server_url: Option, + /// Strict airgap — never contact the network. + pub offline: bool, +} + +impl VendorServiceConfig { + /// Whether this run may actually attempt a service download right now: + /// the mode permits it, we're online, and a client is configured. + pub fn service_enabled(&self) -> bool { + self.source.may_use_service() && !self.offline && self.client.is_some() + } +} + /// One warning per staged file whose pre-patch content matched NEITHER /// `beforeHash` nor `afterHash` and was overwritten with the verified /// patched content (vendor staging always force-applies — the stage is a @@ -528,6 +616,45 @@ mod policy_tests { } } +#[cfg(test)] +mod vendor_source_tests { + use super::*; + + #[test] + fn parse_accepts_known_tokens_case_insensitively() { + assert_eq!(VendorSource::parse("auto").unwrap(), VendorSource::Auto); + assert_eq!(VendorSource::parse("AUTO").unwrap(), VendorSource::Auto); + assert_eq!(VendorSource::parse(" service ").unwrap(), VendorSource::Service); + assert_eq!(VendorSource::parse("Build").unwrap(), VendorSource::Build); + } + + #[test] + fn parse_rejects_unknown_tokens() { + let err = VendorSource::parse("download").unwrap_err(); + assert!(err.contains("download"), "echoes the bad token: {err}"); + assert!(err.contains("auto, service, or build"), "lists the set: {err}"); + assert!(VendorSource::parse("").is_err()); + } + + #[test] + fn as_tag_round_trips_through_parse() { + for s in [VendorSource::Auto, VendorSource::Service, VendorSource::Build] { + assert_eq!(VendorSource::parse(s.as_tag()).unwrap(), s); + } + } + + #[test] + fn default_is_auto_and_mode_predicates_hold() { + assert_eq!(VendorSource::default(), VendorSource::Auto); + assert!(VendorSource::Auto.may_use_service()); + assert!(VendorSource::Service.may_use_service()); + assert!(!VendorSource::Build.may_use_service()); + assert!(VendorSource::Service.requires_service()); + assert!(!VendorSource::Auto.requires_service()); + assert!(!VendorSource::Build.requires_service()); + } +} + #[cfg(test)] mod harvest_tests { use super::*; diff --git a/crates/socket-patch-core/src/patch/vendor/npm_common.rs b/crates/socket-patch-core/src/patch/vendor/npm_common.rs index c9f5a8e..955895f 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_common.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_common.rs @@ -12,19 +12,25 @@ //! the project byte-untouched (a dry run stops after verification and //! creates nothing on disk). +use std::collections::HashMap; use std::path::Path; use serde_json::Value; use crate::manifest::schema::PatchRecord; -use crate::patch::apply::{normalize_file_path, ApplyResult, PatchSources}; +use crate::patch::apply::{ + normalize_file_path, ApplyResult, PatchSources, VerifyResult, VerifyStatus, +}; use crate::patch::copy_tree::{fresh_copy, remove_tree}; +use crate::patch::package::read_archive_to_map; use crate::patch::path_safety; +use crate::utils::fs::atomic_write_bytes; use crate::utils::purl::{percent_decode_purl_component, strip_purl_qualifiers}; use super::npm_pack::{pack_deterministic, PackedTarball}; use super::path::vendor_uuid_dir_rel; -use super::{VendorOutcome, VendorWarning}; +use super::service_fetch::{fetch_verified_archive, ServiceArtifact}; +use super::{VendorOutcome, VendorServiceConfig, VendorWarning}; /// Validated npm vendoring coordinates (the output of /// [`guard_coordinates`]). `name`/`version` are the percent-DECODED purl @@ -131,9 +137,32 @@ pub(super) async fn stage_patch_pack( dry_run: bool, force: bool, warnings: &mut Vec, + service: Option<&VendorServiceConfig>, ) -> Result<(Option, ApplyResult), Box> { let coords = guard_coordinates(purl, record)?; + // ── Service-download fast path (Tier A: write the prebuilt tarball) ── + // When the vendoring service is configured, try to download the already- + // built, integrity-verified tarball instead of staging+patching+packing + // locally. A dry run previews the local build (no network). Per the + // `auto`/`service` policy a non-fatal miss falls back to the local build + // below; under `service` it fails closed. + if let Some(cfg) = service { + if cfg.source.requires_service() && cfg.offline { + return Err(Box::new(refused( + "vendor_service_offline_conflict", + "--vendor-source=service needs the network but --offline is set".to_string(), + ))); + } + if cfg.service_enabled() && !dry_run { + match try_service_pack(purl, project_root, &coords, record, cfg, warnings).await { + ServicePackDecision::Used(pair) => return Ok(*pair), + ServicePackDecision::HardFail(outcome) => return Err(outcome), + ServicePackDecision::FallBack => { /* fall through to local build */ } + } + } + } + // ── Stage + patch a private copy ──────────────────────────────────── // The stage lives in a tempdir OUTSIDE the project: nothing inside the // project is written until the patched tarball verifies. @@ -254,6 +283,224 @@ pub(super) async fn stage_patch_pack( )) } +// ───────────────────────── service-download path ───────────────────────── + +/// Outcome of attempting the service-download fast path in [`stage_patch_pack`]. +enum ServicePackDecision { + /// Use the service artifact — the staged pack + a synthesized success. + /// Boxed: the pair is large relative to the other (small) variants. + Used(Box<(Option, ApplyResult)>), + /// Abort vendoring this package (a `service`-mode miss, or a downloaded + /// artifact we could not turn into a staged pack). + HardFail(Box), + /// Fall back to the local stage→patch→pack build. + FallBack, +} + +/// Download + verify the prebuilt tarball and turn it into an [`NpmStagedPack`], +/// mapping each service outcome onto the `auto` / `service` fallback policy. +async fn try_service_pack( + purl: &str, + project_root: &Path, + coords: &NpmCoords, + record: &PatchRecord, + cfg: &VendorServiceConfig, + warnings: &mut Vec, +) -> ServicePackDecision { + let hard_fail = + |detail: String| ServicePackDecision::HardFail(Box::new(done_failure(purl, detail))); + match fetch_verified_archive(cfg, &record.uuid, &coords.name).await { + ServiceArtifact::Ready(archive) => { + match staged_pack_from_service_bytes( + purl, + project_root, + coords, + record, + &archive.bytes, + &archive.integrity_sri, + ) + .await + { + Ok(staged) => { + warnings.push(VendorWarning::new( + "vendor_prebuilt_downloaded", + format!( + "vendored {}@{} from the patch service ({})", + coords.name, coords.version, archive.source_url + ), + )); + let result = + synthesized_service_result(purl, &project_root.join(&staged.rel_tgz), record); + ServicePackDecision::Used(Box::new((Some(staged), result))) + } + Err(outcome) => ServicePackDecision::HardFail(outcome), + } + } + // An artifact that downloaded but failed integrity is NEVER silently + // used; under `auto` we fall back to a fresh local build (loudly). + ServiceArtifact::IntegrityMismatch(reason) => { + if cfg.source.requires_service() { + hard_fail(format!( + "prebuilt artifact failed integrity verification: {reason}" + )) + } else { + warnings.push(VendorWarning::new( + "vendor_prebuilt_integrity_mismatch", + format!( + "prebuilt artifact failed integrity ({reason}); building locally instead" + ), + )); + ServicePackDecision::FallBack + } + } + ServiceArtifact::Pending => { + if cfg.source.requires_service() { + hard_fail("prebuilt artifact is still building".to_string()) + } else { + warnings.push(VendorWarning::new( + "vendor_prebuilt_pending", + "prebuilt artifact is still building; building locally instead".to_string(), + )); + ServicePackDecision::FallBack + } + } + // The common, quiet miss: not built / free-only / not found. + ServiceArtifact::Unavailable(reason) => { + if cfg.source.requires_service() { + hard_fail(format!("prebuilt artifact unavailable: {reason}")) + } else { + ServicePackDecision::FallBack + } + } + ServiceArtifact::Failed(reason) => { + if cfg.source.requires_service() { + hard_fail(format!("patch service request failed: {reason}")) + } else { + warnings.push(VendorWarning::new( + "vendor_prebuilt_unavailable", + format!("patch service request failed ({reason}); building locally instead"), + )); + ServicePackDecision::FallBack + } + } + } +} + +/// Build an [`NpmStagedPack`] from service-downloaded, sha512-verified tarball +/// bytes: write the tarball to the vendor path and (when the patch rewrote +/// `package.json`) extract it for the lockfile's dependency-mirror recompute. +/// +/// Re-derives the [`PackedTarball`] facts from the bytes so the lockfile +/// `integrity` is byte-identical to a local build, and asserts they match the +/// integrity the service vouched for (the caller already verified the bytes +/// against it — this guards the value actually written to the lock). +async fn staged_pack_from_service_bytes( + purl: &str, + project_root: &Path, + coords: &NpmCoords, + record: &PatchRecord, + bytes: &[u8], + service_sri: &str, +) -> Result> { + let packed = PackedTarball::from_bytes(bytes); + if packed.integrity != service_sri { + return Err(Box::new(done_failure( + purl, + format!( + "recomputed integrity {} disagrees with the service integrity {service_sri}", + packed.integrity + ), + ))); + } + + let rel_tgz = format!( + "{}/{}", + coords.uuid_dir_rel, + tgz_rel_leaf(&coords.name, &coords.version) + ); + let dest = project_root.join(&rel_tgz); + if let Some(parent) = dest.parent() { + if let Err(e) = tokio::fs::create_dir_all(parent).await { + return Err(Box::new(done_failure( + purl, + format!("cannot create {}: {e}", parent.display()), + ))); + } + } + if let Err(e) = atomic_write_bytes(&dest, bytes).await { + return Err(Box::new(done_failure( + purl, + format!("cannot write the vendored tarball: {e}"), + ))); + } + + let staged_pkg_json = if record + .files + .keys() + .any(|k| normalize_file_path(k) == "package.json") + { + match read_package_json_from_vendored_tgz(&dest).await { + Ok(pkg) => Some(pkg), + Err(e) => return Err(Box::new(done_failure(purl, e))), + } + } else { + None + }; + + Ok(NpmStagedPack { + name: coords.name.clone(), + version: coords.version.clone(), + rel_tgz, + packed, + staged_pkg_json, + }) +} + +/// Read the patched `package.json` out of a written vendored tarball (used +/// only when the patch rewrote it — the lock's dependency mirror is then +/// stale and recomputed from this). +async fn read_package_json_from_vendored_tgz(dest: &Path) -> Result { + let dest = dest.to_path_buf(); + let map = tokio::task::spawn_blocking(move || read_archive_to_map(&dest)) + .await + .map_err(|e| format!("join error reading the vendored tarball: {e}"))? + .map_err(|e| format!("cannot read the vendored tarball: {e}"))?; + let bytes = map.get("package.json").ok_or_else(|| { + "the patch rewrites package.json but the prebuilt artifact has none".to_string() + })?; + serde_json::from_slice(bytes) + .map_err(|e| format!("vendored package.json is not parseable JSON: {e}")) +} + +/// Synthesize a success [`ApplyResult`] for a service-downloaded package: +/// there is no local apply to verify, so every patched file reads as +/// `AlreadyPatched` (trust is the service-verified integrity). Mirrors the +/// in-sync hot path's synthesized result. +fn synthesized_service_result(purl: &str, dest: &Path, record: &PatchRecord) -> ApplyResult { + let files_verified = record + .files + .keys() + .map(|file| VerifyResult { + file: file.clone(), + status: VerifyStatus::AlreadyPatched, + message: None, + current_hash: None, + expected_hash: None, + target_hash: None, + }) + .collect(); + ApplyResult { + package_key: purl.to_string(), + package_path: dest.display().to_string(), + success: true, + files_verified, + files_patched: Vec::new(), + applied_via: HashMap::new(), + error: None, + sidecar: None, + } +} + // ───────────────────────────── small helpers ───────────────────────────── /// `pkg:npm/[@scope/]name@version` → `(name, version)`; scoped names keep diff --git a/crates/socket-patch-core/src/patch/vendor/npm_flavor.rs b/crates/socket-patch-core/src/patch/vendor/npm_flavor.rs index b0efc65..d163b65 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_flavor.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_flavor.rs @@ -273,6 +273,7 @@ pub async fn vendor_npm_any( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&super::VendorServiceConfig>, ) -> VendorOutcome { let (flavor, probe_warnings) = match detect_npm_lock_flavor(project_root).await { Ok(found) => found, @@ -289,6 +290,7 @@ pub async fn vendor_npm_any( vendored_at, dry_run, force, + service, ) .await } @@ -302,6 +304,7 @@ pub async fn vendor_npm_any( vendored_at, dry_run, force, + service, ) .await } @@ -315,6 +318,7 @@ pub async fn vendor_npm_any( vendored_at, dry_run, force, + service, ) .await } @@ -328,6 +332,7 @@ pub async fn vendor_npm_any( vendored_at, dry_run, force, + service, ) .await } @@ -341,6 +346,7 @@ pub async fn vendor_npm_any( vendored_at, dry_run, force, + service, ) .await } @@ -713,6 +719,7 @@ mod tests { "2026-06-09T00:00:00Z", false, false, + None, ) .await } diff --git a/crates/socket-patch-core/src/patch/vendor/npm_lock.rs b/crates/socket-patch-core/src/patch/vendor/npm_lock.rs index c10ea29..2db8e5b 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_lock.rs @@ -82,6 +82,7 @@ pub async fn vendor_npm( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&super::VendorServiceConfig>, ) -> VendorOutcome { let mut warnings: Vec = Vec::new(); @@ -176,6 +177,7 @@ pub async fn vendor_npm( dry_run, force, &mut warnings, + service, ) .await { @@ -861,6 +863,7 @@ mod tests { "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await } @@ -1215,6 +1218,7 @@ mod tests { "2026-06-09T00:00:00Z", false, /*force=*/ true, + None, ) .await; let (result, entry, _) = expect_done(outcome); @@ -1832,4 +1836,307 @@ mod tests { assert_eq!(escape_json_pointer_token("@scope/name"), "@scope~1name"); assert_eq!(escape_json_pointer_token("a~b"), "a~0b"); } + + // ─────────────── service-download path (Tier A: npm) ─────────────── + // + // Both halves of the contract are exercised: the service-backed download + // AND the local-build fallback, against a `wiremock` stand-in for the + // patch.socket.dev two-step (package-reference POST + serve GET). + + use crate::api::client::{ApiClient, ApiClientOptions}; + use crate::patch::vendor::{VendorServiceConfig, VendorSource}; + + const SERVE_PATH: &str = "/patch/npm/left-pad/1.3.0/grant-tok/uuid/left-pad-1.3.0.tgz"; + + fn service_cfg(server_uri: &str, source: VendorSource, offline: bool) -> VendorServiceConfig { + VendorServiceConfig { + source, + client: Some(ApiClient::new(ApiClientOptions { + api_url: server_uri.to_string(), + api_token: Some("sktsec_placeholder_value_for_tests_api".into()), + use_public_proxy: false, + org_slug: Some("acme".into()), + })), + use_public_proxy: false, + vendor_url: None, + patch_server_url: None, + offline, + } + } + + async fn vendor_service(fx: &Fixture, cfg: &VendorServiceConfig) -> VendorOutcome { + let blobs = fx.root().join(".socket/blobs"); + let sources = PatchSources::blobs_only(&blobs); + vendor_npm( + &fx.purl(), + &fx.installed(), + fx.root(), + &fx.record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(cfg), + ) + .await + } + + /// The deterministic tgz a LOCAL build yields for the fixture's patch + /// (vendored in a throwaway copy), plus its sha512 SRI — the bytes the + /// service is made to serve so integrity matches by construction. + async fn locally_built_artifact() -> (Vec, String) { + let fx = fixture().await; + let (result, entry, _) = expect_done(fx.vendor(false).await); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + let tgz = tokio::fs::read(fx.root().join(fx.expected_rel_tgz())) + .await + .unwrap(); + let sri = sri_sha512(&tgz); + (tgz, sri) + } + + async fn mount_granted(server: &wiremock::MockServer, sha512: &str, tgz: &[u8]) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + let serve_url = format!("{}{SERVE_PATH}", server.uri()); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "results": { UUID: { + "status": "granted", + "url": serve_url, + "purl": "pkg:npm/left-pad@1.3.0", + "artifacts": [{ "kind": "tarball", "url": serve_url, + "integrity": { "sha512": sha512 } }] + }} + }))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path(SERVE_PATH)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(tgz.to_vec())) + .mount(server) + .await; + } + + async fn mount_status_only(server: &wiremock::MockServer, status: &str) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "results": { UUID: { "status": status, "url": null, "artifacts": [] } } + }))) + .mount(server) + .await; + } + + /// Mount a POST mock asserting the service is NEVER contacted. + async fn mount_post_never(server: &wiremock::MockServer) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(500)) + .expect(0) + .mount(server) + .await; + } + + fn lock_integrity(lock: &Value, key: &str) -> String { + lock["packages"][key]["integrity"] + .as_str() + .unwrap_or_default() + .to_string() + } + + /// Service success: the prebuilt tarball is written verbatim, the lock is + /// rewired to the service integrity, the ledger describes the bytes, and a + /// `vendor_prebuilt_downloaded` advisory is emitted. Because the served + /// bytes ARE the local-build bytes, this also proves byte-for-byte parity + /// between the two paths. + #[tokio::test] + async fn service_success_writes_tgz_and_rewires_lock() { + let (served, sri) = locally_built_artifact().await; + let server = wiremock::MockServer::start().await; + mount_granted(&server, &sri, &served).await; + + let fx = fixture().await; + let outcome = vendor_service(&fx, &service_cfg(&server.uri(), VendorSource::Service, false)).await; + let (result, entry, warnings) = expect_done(outcome); + assert!(result.success, "{:?}", result.error); + let entry = entry.expect("service vendor must carry a ledger entry"); + + let on_disk = tokio::fs::read(fx.root().join(fx.expected_rel_tgz())) + .await + .unwrap(); + assert_eq!(on_disk, served, "service tgz written byte-for-byte"); + assert_eq!(entry.artifact.sha256, hex::encode(sha2::Sha256::digest(&served))); + assert_eq!(entry.artifact.size, Some(served.len() as u64)); + + let lock = fx.read_lock().await; + for key in [ + "node_modules/left-pad", + "node_modules/foo/node_modules/left-pad", + ] { + assert_eq!(lock_integrity(&lock, key), sri, "{key}: lock integrity = service sha512"); + assert_eq!( + lock["packages"][key]["resolved"], + json!(format!("file:{}", fx.expected_rel_tgz())), + "{key}: resolved rewired to the vendored tarball" + ); + } + assert!( + warnings.iter().any(|w| w.code == "vendor_prebuilt_downloaded"), + "expected a vendor_prebuilt_downloaded advisory, got {warnings:?}" + ); + } + + /// `service` mode + a downloaded artifact that fails integrity = hard fail, + /// project byte-untouched (no tgz, lock unchanged). + #[tokio::test] + async fn service_integrity_mismatch_service_mode_hard_fails() { + let (served, _) = locally_built_artifact().await; + let wrong = sri_sha512(b"not the real tarball"); + let server = wiremock::MockServer::start().await; + mount_granted(&server, &wrong, &served).await; + + let fx = fixture().await; + let before = tokio::fs::read(fx.lock_path()).await.unwrap(); + let (result, _entry, _) = + expect_done(vendor_service(&fx, &service_cfg(&server.uri(), VendorSource::Service, false)).await); + assert!(!result.success, "integrity mismatch under `service` must fail"); + assert_eq!( + tokio::fs::read(fx.lock_path()).await.unwrap(), + before, + "lock must be byte-untouched on a hard fail" + ); + assert!( + !fx.root().join(fx.expected_rel_tgz()).exists(), + "no tarball must be written on a hard fail" + ); + } + + /// `auto` + integrity mismatch falls back to a local build (loudly): the + /// lock ends up rewired to the LOCALLY-recomputed integrity, not the bad + /// service value. + #[tokio::test] + async fn service_integrity_mismatch_auto_falls_back_to_build() { + let (served, _) = locally_built_artifact().await; + let wrong = sri_sha512(b"not the real tarball"); + let server = wiremock::MockServer::start().await; + mount_granted(&server, &wrong, &served).await; + + let fx = fixture().await; + let (result, entry, warnings) = + expect_done(vendor_service(&fx, &service_cfg(&server.uri(), VendorSource::Auto, false)).await); + assert!(result.success, "auto must fall back to a successful build: {:?}", result.error); + assert!(entry.is_some()); + let on_disk = tokio::fs::read(fx.root().join(fx.expected_rel_tgz())) + .await + .unwrap(); + let local_sri = sri_sha512(&on_disk); + assert_eq!( + lock_integrity(&fx.read_lock().await, "node_modules/left-pad"), + local_sri, + "fallback build's integrity, not the bad service value" + ); + assert!( + warnings.iter().any(|w| w.code == "vendor_prebuilt_integrity_mismatch"), + "expected a vendor_prebuilt_integrity_mismatch advisory, got {warnings:?}" + ); + } + + /// `auto` + pending_build falls back to a local build (with an advisory). + #[tokio::test] + async fn service_pending_build_auto_falls_back() { + let server = wiremock::MockServer::start().await; + mount_status_only(&server, "pending_build").await; + + let fx = fixture().await; + let (result, entry, warnings) = + expect_done(vendor_service(&fx, &service_cfg(&server.uri(), VendorSource::Auto, false)).await); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + assert!(fx.root().join(fx.expected_rel_tgz()).exists()); + assert!(warnings.iter().any(|w| w.code == "vendor_prebuilt_pending")); + } + + /// `service` mode + pending_build hard-fails (no fallback). + #[tokio::test] + async fn service_pending_build_service_mode_hard_fails() { + let server = wiremock::MockServer::start().await; + mount_status_only(&server, "pending_build").await; + + let fx = fixture().await; + let (result, _, _) = + expect_done(vendor_service(&fx, &service_cfg(&server.uri(), VendorSource::Service, false)).await); + assert!(!result.success); + assert!(!fx.root().join(fx.expected_rel_tgz()).exists()); + } + + /// `auto` + not_found falls back QUIETLY (the common "not built / free-only" + /// case must not emit a loud `vendor_prebuilt_*` advisory). + #[tokio::test] + async fn service_not_found_auto_falls_back_quietly() { + let server = wiremock::MockServer::start().await; + mount_status_only(&server, "not_found").await; + + let fx = fixture().await; + let (result, entry, warnings) = + expect_done(vendor_service(&fx, &service_cfg(&server.uri(), VendorSource::Auto, false)).await); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + assert!( + !warnings.iter().any(|w| w.code.starts_with("vendor_prebuilt_")), + "a not_found miss must be quiet, got {warnings:?}" + ); + } + + /// `--offline` + `auto`: the service is NEVER contacted; the local build runs. + #[tokio::test] + async fn offline_auto_does_not_call_service() { + let server = wiremock::MockServer::start().await; + mount_post_never(&server).await; + + let fx = fixture().await; + let (result, entry, _) = + expect_done(vendor_service(&fx, &service_cfg(&server.uri(), VendorSource::Auto, true)).await); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + // `mount_post_never`'s `.expect(0)` is verified on `server` drop. + } + + /// `--vendor-source=build`: the service is NEVER contacted; the local build runs. + #[tokio::test] + async fn build_mode_does_not_call_service() { + let server = wiremock::MockServer::start().await; + mount_post_never(&server).await; + + let fx = fixture().await; + let (result, entry, _) = + expect_done(vendor_service(&fx, &service_cfg(&server.uri(), VendorSource::Build, false)).await); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + } + + /// `--offline` + `--vendor-source=service` is an irreconcilable request: + /// refuse loudly, touch nothing, never hit the network. + #[tokio::test] + async fn offline_service_mode_refuses() { + let server = wiremock::MockServer::start().await; + mount_post_never(&server).await; + + let fx = fixture().await; + let before = tokio::fs::read(fx.lock_path()).await.unwrap(); + match vendor_service(&fx, &service_cfg(&server.uri(), VendorSource::Service, true)).await { + VendorOutcome::Refused { code, .. } => { + assert_eq!(code, "vendor_service_offline_conflict"); + } + other => panic!("expected Refused, got {other:?}"), + } + assert_eq!(tokio::fs::read(fx.lock_path()).await.unwrap(), before); + assert!(!fx.root().join(fx.expected_rel_tgz()).exists()); + } } diff --git a/crates/socket-patch-core/src/patch/vendor/npm_pack.rs b/crates/socket-patch-core/src/patch/vendor/npm_pack.rs index e8cfe89..733eeb5 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_pack.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_pack.rs @@ -38,6 +38,28 @@ pub struct PackedTarball { pub size: u64, } +impl PackedTarball { + /// Compute the tarball's identity facts (sha512 SRI / sha256 / sha1 / size) + /// from its bytes, writing nothing. + /// + /// Single home for the hash formulas so a locally-packed tarball + /// ([`pack_deterministic`]) and a service-downloaded one + /// ([`super::npm_common::staged_pack_from_service_bytes`]) describe + /// themselves identically — the lockfile `integrity` is byte-for-byte the + /// same whichever path produced the bytes. + pub fn from_bytes(bytes: &[u8]) -> PackedTarball { + PackedTarball { + integrity: format!( + "sha512-{}", + base64::engine::general_purpose::STANDARD.encode(Sha512::digest(bytes)) + ), + sha256_hex: hex::encode(Sha256::digest(bytes)), + sha1_hex: hex::encode(Sha1::digest(bytes)), + size: bytes.len() as u64, + } + } +} + /// Pack every regular file under `staged_dir` into an npm-conventional /// `package/`-prefixed tar.gz at `dest`, deterministically (see module docs). /// @@ -55,16 +77,7 @@ pub async fn pack_deterministic(staged_dir: &Path, dest: &Path) -> std::io::Resu atomic_write_bytes(dest, &bytes).await?; - let integrity = format!( - "sha512-{}", - base64::engine::general_purpose::STANDARD.encode(Sha512::digest(&bytes)) - ); - Ok(PackedTarball { - integrity, - sha256_hex: hex::encode(Sha256::digest(&bytes)), - sha1_hex: hex::encode(Sha1::digest(&bytes)), - size: bytes.len() as u64, - }) + Ok(PackedTarball::from_bytes(&bytes)) } /// Build the deterministic tar.gz in memory (vendored packages are small — @@ -350,4 +363,27 @@ mod tests { assert!(!name.starts_with(".socket-stage-"), "stage litter: {name}"); } } + + /// The DRY invariant the service-download path depends on: a locally-packed + /// tarball's returned facts are exactly `PackedTarball::from_bytes` of the + /// bytes that landed on disk. So a service-downloaded tarball that hashes + /// the same describes itself identically (same lockfile `integrity`). + #[tokio::test] + async fn pack_deterministic_result_equals_from_bytes_of_written_file() { + let tmp = tempfile::tempdir().unwrap(); + let stage = tmp.path().join("stage"); + build_stage(&stage).await; + let dest = tmp.path().join("pkg.tgz"); + + let packed = pack_deterministic(&stage, &dest).await.unwrap(); + let written = tokio::fs::read(&dest).await.unwrap(); + let recomputed = PackedTarball::from_bytes(&written); + + assert_eq!(packed.integrity, recomputed.integrity); + assert_eq!(packed.sha256_hex, recomputed.sha256_hex); + assert_eq!(packed.sha1_hex, recomputed.sha1_hex); + assert_eq!(packed.size, recomputed.size); + assert!(packed.integrity.starts_with("sha512-")); + assert_eq!(packed.size, written.len() as u64); + } } diff --git a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs index 01e7151..8aef1d9 100644 --- a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs @@ -85,6 +85,7 @@ pub async fn vendor_pnpm( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&super::VendorServiceConfig>, ) -> VendorOutcome { let mut warnings: Vec = Vec::new(); @@ -169,6 +170,7 @@ pub async fn vendor_pnpm( dry_run, force, &mut warnings, + service, ) .await { @@ -2120,6 +2122,7 @@ snapshots: "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await } @@ -2759,6 +2762,7 @@ snapshots: "2026-06-09T00:00:00Z", false, false, + None, ) .await; let (r2, e2, _) = expect_done(outcome); diff --git a/crates/socket-patch-core/src/patch/vendor/pypi.rs b/crates/socket-patch-core/src/patch/vendor/pypi.rs index b9b31f3..cea1c09 100644 --- a/crates/socket-patch-core/src/patch/vendor/pypi.rs +++ b/crates/socket-patch-core/src/patch/vendor/pypi.rs @@ -8,9 +8,12 @@ use std::path::Path; +use sha2::{Digest as _, Sha256}; + use crate::crawlers::python_crawler::canonicalize_pypi_name; use crate::manifest::schema::PatchRecord; use crate::patch::apply::{ApplyResult, PatchSources, VerifyResult, VerifyStatus}; +use crate::utils::fs::atomic_write_bytes; use crate::utils::purl::{parse_pypi_purl, strip_purl_qualifiers}; use super::path::vendor_uuid_dir_rel; @@ -22,12 +25,13 @@ use super::pypi_uv::{ check_target_guards, classify_dependency, load_uv_project, revert_uv, wire_uv, UvDepClass, UvProject, UvTarget, }; -use super::pypi_wheel::{build_patched_wheel, locate_installed_dist, wheel_file_name}; +use super::pypi_wheel::{build_patched_wheel, locate_installed_dist, wheel_file_name, WheelArtifact}; +use super::service_fetch::{fetch_verified_archive, ServiceArtifact}; use super::state::{ write_marker, PdmMeta, PipenvMeta, PoetryMeta, UvMeta, VendorArtifact, VendorEntry, VendorMarker, }; -use super::{RevertOutcome, VendorOutcome, VendorWarning}; +use super::{RevertOutcome, VendorOutcome, VendorServiceConfig, VendorWarning}; /// Which wiring backend serves this project. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -258,6 +262,22 @@ fn in_sync_outcome( record: &PatchRecord, warnings: Vec, ) -> VendorOutcome { + VendorOutcome::Done { + result: synthesized_apply_result(base_purl, record, String::new()), + entry: None, + warnings, + } +} + +/// A synthesized success [`ApplyResult`] in which every patched file reads as +/// `AlreadyPatched` — used by the in-sync hot path AND the service-download +/// path (where there is no local apply to verify; trust is the service-verified +/// integrity). +fn synthesized_apply_result( + base_purl: &str, + record: &PatchRecord, + package_path: String, +) -> ApplyResult { let files_verified = record .files .keys() @@ -270,19 +290,15 @@ fn in_sync_outcome( target_hash: None, }) .collect(); - VendorOutcome::Done { - result: ApplyResult { - package_key: base_purl.to_string(), - package_path: String::new(), - success: true, - files_verified, - files_patched: Vec::new(), - applied_via: std::collections::HashMap::new(), - error: None, - sidecar: None, - }, - entry: None, - warnings, + ApplyResult { + package_key: base_purl.to_string(), + package_path, + success: true, + files_verified, + files_patched: Vec::new(), + applied_via: std::collections::HashMap::new(), + error: None, + sidecar: None, } } @@ -299,6 +315,7 @@ pub async fn vendor_pypi( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&VendorServiceConfig>, ) -> VendorOutcome { // The purl may carry `?artifact_id=` variant qualifiers; everything here // keys off the qualifier-free base. @@ -420,32 +437,34 @@ pub async fn vendor_pypi( } } - let dist = match locate_installed_dist(site_packages, raw_name, version).await { - Ok(d) => d, - Err((code, detail)) => return VendorOutcome::Refused { code, detail }, - }; - let wheel_name = match wheel_file_name(&dist) { - Ok(n) => n, - Err((code, detail)) => return VendorOutcome::Refused { code, detail }, - }; - let rel_wheel = format!("{uuid_dir_rel}/{wheel_name}"); - let dest = project_root.join(&uuid_dir_rel).join(&wheel_name); - - let built = build_patched_wheel( + // Acquire the patched wheel: prefer the prebuilt service artifact (which + // skips needing the package installed), else build it locally. A refusal / + // hard fail bubbles as a terminal outcome. + let AcquiredWheel { + wheel_name, + rel_wheel, + result, + artifact, + platform_locked, + platform_tags_display, + } = match acquire_patched_wheel( base, + raw_name, + version, site_packages, - &dist, + &uuid_dir_rel, + project_root, record, sources, - &dest, dry_run, force, + service, &mut warnings, ) - .await; - let (result, artifact) = match built { - Ok(pair) => pair, - Err((code, detail)) => return VendorOutcome::Refused { code, detail }, + .await + { + Ok(a) => a, + Err(outcome) => return outcome, }; if dry_run || !result.success { return VendorOutcome::Done { @@ -468,7 +487,6 @@ pub async fn vendor_pypi( // A compiled-extension wheel (cp311/manylinux tags) only installs on this // platform, where the registry offered wheels for many — surface it. - let platform_locked = dist.wheel_tags.iter().any(|t| tag_is_platform_specific(t)); if platform_locked { let per_flavor = match flavor { PypiFlavor::UvProject => "uv.lock now resolves it from this single-platform wheel only", @@ -487,8 +505,7 @@ pub async fn vendor_pypi( "vendor_platform_locked", format!( "the vendored wheel for {canon_name}=={version} is platform-specific \ - ({}); {per_flavor}", - dist.wheel_tags.join(", ") + ({platform_tags_display}); {per_flavor}" ), )); } @@ -708,6 +725,243 @@ pub async fn revert_pypi(entry: &VendorEntry, project_root: &Path, dry_run: bool outcome } +/// The patched wheel plus the facts the wiring + ledger need, however it was +/// acquired (service download or local build). +struct AcquiredWheel { + wheel_name: String, + rel_wheel: String, + result: ApplyResult, + /// `None` on a dry run or a failed build (the caller short-circuits). + artifact: Option, + platform_locked: bool, + /// Tag list for the `vendor_platform_locked` advisory. + platform_tags_display: String, +} + +/// Acquire the patched wheel: prefer the prebuilt service artifact (which does +/// not require the package to be installed), else build it locally from the +/// installed dist. Returns `Err(outcome)` with the terminal `VendorOutcome` to +/// bubble (a refusal, or a `service`-mode miss). +#[allow(clippy::too_many_arguments)] +async fn acquire_patched_wheel( + base: &str, + raw_name: &str, + version: &str, + site_packages: &Path, + uuid_dir_rel: &str, + project_root: &Path, + record: &PatchRecord, + sources: &PatchSources<'_>, + dry_run: bool, + force: bool, + service: Option<&VendorServiceConfig>, + warnings: &mut Vec, +) -> Result { + if let Some(cfg) = service { + if cfg.source.requires_service() && cfg.offline { + return Err(VendorOutcome::Refused { + code: "vendor_service_offline_conflict", + detail: "--vendor-source=service needs the network but --offline is set" + .to_string(), + }); + } + // A dry run previews the local build; the service is only consulted for + // a real vendor. + if cfg.service_enabled() && !dry_run { + match try_pypi_service_wheel( + base, + raw_name, + uuid_dir_rel, + project_root, + record, + cfg, + warnings, + ) + .await + { + PypiServiceWheel::Used(acq) => return Ok(*acq), + PypiServiceWheel::HardFail(outcome) => return Err(*outcome), + PypiServiceWheel::FallBack => {} + } + } + } + + // Local build from the installed dist. + let dist = match locate_installed_dist(site_packages, raw_name, version).await { + Ok(d) => d, + Err((code, detail)) => return Err(VendorOutcome::Refused { code, detail }), + }; + let wheel_name = match wheel_file_name(&dist) { + Ok(n) => n, + Err((code, detail)) => return Err(VendorOutcome::Refused { code, detail }), + }; + let rel_wheel = format!("{uuid_dir_rel}/{wheel_name}"); + let dest = project_root.join(uuid_dir_rel).join(&wheel_name); + let platform_locked = dist.wheel_tags.iter().any(|t| tag_is_platform_specific(t)); + let platform_tags_display = dist.wheel_tags.join(", "); + let (result, artifact) = match build_patched_wheel( + base, + site_packages, + &dist, + record, + sources, + &dest, + dry_run, + force, + warnings, + ) + .await + { + Ok(pair) => pair, + Err((code, detail)) => return Err(VendorOutcome::Refused { code, detail }), + }; + Ok(AcquiredWheel { + wheel_name, + rel_wheel, + result, + artifact, + platform_locked, + platform_tags_display, + }) +} + +/// Outcome of attempting a pypi service download. +enum PypiServiceWheel { + /// Boxed: the wheel facts are large relative to the other variants. + Used(Box), + /// Bubble this terminal outcome (a `service`-mode miss, or a write failure). + HardFail(Box), + /// Fall back to the local build. + FallBack, +} + +/// Download + verify the prebuilt wheel for `record.uuid`, mapping each service +/// outcome onto the `auto` / `service` policy. Only `.whl` artifacts are usable +/// (pypi vendoring is wheel-based); an sdist (or any miss) is a fallback under +/// `auto` and a hard fail under `service`. +async fn try_pypi_service_wheel( + base: &str, + name: &str, + uuid_dir_rel: &str, + project_root: &Path, + record: &PatchRecord, + cfg: &VendorServiceConfig, + warnings: &mut Vec, +) -> PypiServiceWheel { + // A terminal `service`-mode refusal (boxed — the enum's other variants are + // small). A nested fn so both `miss` and the write-failure sites can use it. + fn hard_fail(code: &'static str, detail: String) -> PypiServiceWheel { + PypiServiceWheel::HardFail(Box::new(VendorOutcome::Refused { code, detail })) + } + // service-required → hard fail; `auto` → warn + fall back to the local build. + let miss = |warnings: &mut Vec, code: &'static str, reason: String| { + if cfg.source.requires_service() { + hard_fail("vendor_prebuilt_required", reason) + } else { + warnings.push(VendorWarning::new(code, format!("{reason}; building locally instead"))); + PypiServiceWheel::FallBack + } + }; + + match fetch_verified_archive(cfg, &record.uuid, name).await { + ServiceArtifact::Ready(archive) => { + let Some(wheel_name) = wheel_filename_from_url(&archive.source_url) else { + return miss( + warnings, + "vendor_prebuilt_unavailable", + "the prebuilt artifact is not a .whl (pypi vendoring is wheel-based)" + .to_string(), + ); + }; + let rel_wheel = format!("{uuid_dir_rel}/{wheel_name}"); + let dest = project_root.join(uuid_dir_rel).join(&wheel_name); + if let Some(parent) = dest.parent() { + if let Err(e) = tokio::fs::create_dir_all(parent).await { + return hard_fail( + "vendor_prebuilt_write_failed", + format!("cannot create {}: {e}", parent.display()), + ); + } + } + if let Err(e) = atomic_write_bytes(&dest, &archive.bytes).await { + return hard_fail( + "vendor_prebuilt_write_failed", + format!("cannot write the vendored wheel: {e}"), + ); + } + let (platform_locked, platform_tags_display) = + wheel_platform_from_filename(&wheel_name); + warnings.push(VendorWarning::new( + "vendor_prebuilt_downloaded", + format!( + "vendored the wheel for {base} from the patch service ({})", + archive.source_url + ), + )); + PypiServiceWheel::Used(Box::new(AcquiredWheel { + rel_wheel, + result: synthesized_apply_result(base, record, dest.display().to_string()), + artifact: Some(WheelArtifact { + file_name: wheel_name.clone(), + sha256_hex: hex::encode(Sha256::digest(&archive.bytes)), + size: archive.bytes.len() as u64, + }), + wheel_name, + platform_locked, + platform_tags_display, + })) + } + ServiceArtifact::IntegrityMismatch(reason) => miss( + warnings, + "vendor_prebuilt_integrity_mismatch", + format!("prebuilt wheel failed integrity ({reason})"), + ), + ServiceArtifact::Pending => miss( + warnings, + "vendor_prebuilt_pending", + "prebuilt wheel is still building".to_string(), + ), + // Quiet under `auto` (the common "not built / free-only" case). + ServiceArtifact::Unavailable(reason) => { + if cfg.source.requires_service() { + hard_fail( + "vendor_prebuilt_required", + format!("prebuilt wheel unavailable: {reason}"), + ) + } else { + PypiServiceWheel::FallBack + } + } + ServiceArtifact::Failed(reason) => miss( + warnings, + "vendor_prebuilt_unavailable", + format!("patch service request failed ({reason})"), + ), + } +} + +/// The last path segment of a serve URL, when it names a `.whl`. +fn wheel_filename_from_url(url: &str) -> Option { + let path = url.split(['?', '#']).next().unwrap_or(url); + let name = path.rsplit('/').next().unwrap_or(""); + name.ends_with(".whl").then(|| name.to_string()) +} + +/// Derive `(platform_locked, display)` from a wheel filename's trailing tag +/// triple (`{name}-{ver}(-{build})?-{py}-{abi}-{plat}.whl`). Advisory only — +/// the local-build path reads the same from the dist's WHEEL metadata. +fn wheel_platform_from_filename(wheel_name: &str) -> (bool, String) { + let stem = wheel_name.strip_suffix(".whl").unwrap_or(wheel_name); + let parts: Vec<&str> = stem.split('-').collect(); + if parts.len() >= 3 { + let triple = parts[parts.len() - 3..].join("-"); + (tag_is_platform_specific(&triple), triple) + } else { + // Unparseable → cannot prove portability. + (true, stem.to_string()) + } +} + /// Platform-specific iff the tag triple binds an ABI or platform — `cp311- /// none-any` is merely version-bound, `*-cp311-*` / `*-manylinux*` lock the /// artifact to this machine's platform. @@ -947,6 +1201,7 @@ mod tests { "2026-06-09T00:00:00Z", false, false, + None, ) .await; let VendorOutcome::Done { @@ -1087,6 +1342,7 @@ wheels = [ "2026-06-09T00:00:00Z", dry_run, false, + None, ) }; @@ -1165,6 +1421,7 @@ wheels = [ "2026-06-09T00:00:00Z", false, false, + None, ) .await; let VendorOutcome::Refused { code, .. } = outcome else { @@ -1193,6 +1450,7 @@ wheels = [ "2026-06-09T00:00:00Z", true, false, + None, ) .await; let VendorOutcome::Done { result, entry, .. } = outcome else { @@ -1223,6 +1481,7 @@ wheels = [ "2026-06-09T00:00:00Z", false, false, + None, ) .await; let VendorOutcome::Refused { code, .. } = outcome else { @@ -1255,6 +1514,7 @@ wheels = [ "2026-06-09T00:00:00Z", false, false, + None, ) .await; let VendorOutcome::Done { @@ -1319,4 +1579,240 @@ wheels = [ assert!(!outcome.success); assert!(outcome.error.unwrap().contains("mystery")); } + + // ─────────────── service-download path (Tier A: pypi) ─────────────── + // + // The wheel is opaque bytes to the vendor wiring (it embeds the filename + + // a recomputed sha256), so these serve arbitrary bytes under a `.whl` + // filename with a matching sha512. Both the service path AND the + // local-build fallback are exercised. + + use crate::api::client::{ApiClient, ApiClientOptions}; + use crate::patch::vendor::{VendorServiceConfig, VendorSource}; + + const WHEEL_NAME: &str = "six-1.16.0-py2.py3-none-any.whl"; + + fn sri_sha512(bytes: &[u8]) -> String { + use base64::Engine as _; + format!( + "sha512-{}", + base64::engine::general_purpose::STANDARD.encode(sha2::Sha512::digest(bytes)) + ) + } + + fn pypi_service_cfg(server_uri: &str, source: VendorSource, offline: bool) -> VendorServiceConfig { + VendorServiceConfig { + source, + client: Some(ApiClient::new(ApiClientOptions { + api_url: server_uri.to_string(), + api_token: Some("sktsec_placeholder_value_for_tests_api".into()), + use_public_proxy: false, + org_slug: Some("acme".into()), + })), + use_public_proxy: false, + vendor_url: None, + patch_server_url: None, + offline, + } + } + + /// Mount the two-step service for an artifact served at `filename` + /// (`.whl` → usable, `.tar.gz` → sdist fallback) with the given sha512. + async fn mount_pypi_granted( + server: &wiremock::MockServer, + filename: &str, + sha512: &str, + bytes: &[u8], + ) { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, ResponseTemplate}; + let serve_path = format!("/patch/pypi/six/1.16.0/tok/uuid/{filename}"); + let serve_url = format!("{}{serve_path}", server.uri()); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "results": { UUID: { + "status": "granted", + "url": serve_url, + "purl": "pkg:pypi/six@1.16.0", + "artifacts": [{ "kind": "tarball", "url": serve_url, + "integrity": { "sha512": sha512 } }] + }} + }))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path(serve_path)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(bytes.to_vec())) + .mount(server) + .await; + } + + /// Service success (requirements flavor): the prebuilt wheel is written, the + /// requirements line is wired to the RECOMPUTED sha256, and a + /// `vendor_prebuilt_downloaded` advisory is emitted. + #[tokio::test] + async fn service_success_requirements_writes_wheel_and_wires_sha256() { + let fx = e2e_fixture().await; + let sources = PatchSources::blobs_only(&fx.blobs); + let bytes = b"prebuilt wheel bytes from the service"; + let sri = sri_sha512(bytes); + let server = wiremock::MockServer::start().await; + mount_pypi_granted(&server, WHEEL_NAME, &sri, bytes).await; + + let outcome = vendor_pypi( + "pkg:pypi/six@1.16.0", + &fx.site_packages, + &fx.root, + &fx.record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&pypi_service_cfg(&server.uri(), VendorSource::Service, false)), + ) + .await; + let VendorOutcome::Done { result, entry, warnings } = outcome else { + panic!("expected Done, got {outcome:?}"); + }; + assert!(result.success, "{:?}", result.error); + let entry = entry.expect("entry on success"); + + let wheel_rel = format!(".socket/vendor/pypi/{UUID}/{WHEEL_NAME}"); + assert_eq!(entry.artifact.path, wheel_rel); + let on_disk = tokio::fs::read(fx.root.join(&wheel_rel)).await.unwrap(); + assert_eq!(on_disk, bytes, "service wheel written byte-for-byte"); + let expected_sha256 = hex::encode(sha2::Sha256::digest(bytes)); + assert_eq!(entry.artifact.sha256, expected_sha256); + let req = tokio::fs::read_to_string(fx.root.join("requirements.txt")) + .await + .unwrap(); + assert!( + req.contains(&format!("--hash=sha256:{expected_sha256}")), + "requirements line wired to the recomputed sha256: {req}" + ); + assert!(warnings.iter().any(|w| w.code == "vendor_prebuilt_downloaded")); + // site-packages untouched (the service path never needs the install). + assert_eq!( + tokio::fs::read(fx.site_packages.join("six.py")).await.unwrap(), + ORIG + ); + } + + /// An sdist service artifact (not a `.whl`) falls back to the local wheel + /// build under `auto` — pypi vendoring is wheel-based. + #[tokio::test] + async fn service_sdist_artifact_auto_falls_back_to_build() { + let fx = e2e_fixture().await; + let sources = PatchSources::blobs_only(&fx.blobs); + let bytes = b"sdist tarball bytes"; + let sri = sri_sha512(bytes); + let server = wiremock::MockServer::start().await; + mount_pypi_granted(&server, "six-1.16.0.tar.gz", &sri, bytes).await; + + let outcome = vendor_pypi( + "pkg:pypi/six@1.16.0", + &fx.site_packages, + &fx.root, + &fx.record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&pypi_service_cfg(&server.uri(), VendorSource::Auto, false)), + ) + .await; + let VendorOutcome::Done { result, entry, .. } = outcome else { + panic!("expected Done (local build), got {outcome:?}"); + }; + assert!(result.success, "auto must fall back to the local wheel build: {:?}", result.error); + let entry = entry.expect("entry on success"); + // The locally-built wheel landed (not the sdist bytes). + let wheel_rel = format!(".socket/vendor/pypi/{UUID}/{WHEEL_NAME}"); + assert_eq!(entry.artifact.path, wheel_rel); + assert!(fx.root.join(&wheel_rel).exists()); + } + + /// `service` mode + an sdist (non-wheel) artifact hard-fails. + #[tokio::test] + async fn service_sdist_artifact_service_mode_hard_fails() { + let fx = e2e_fixture().await; + let sources = PatchSources::blobs_only(&fx.blobs); + let bytes = b"sdist tarball bytes"; + let sri = sri_sha512(bytes); + let server = wiremock::MockServer::start().await; + mount_pypi_granted(&server, "six-1.16.0.tar.gz", &sri, bytes).await; + + let outcome = vendor_pypi( + "pkg:pypi/six@1.16.0", + &fx.site_packages, + &fx.root, + &fx.record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&pypi_service_cfg(&server.uri(), VendorSource::Service, false)), + ) + .await; + assert!( + matches!(outcome, VendorOutcome::Refused { .. }), + "service mode must refuse a non-wheel artifact, got {outcome:?}" + ); + } + + /// `service` mode + an integrity mismatch hard-fails (nothing written). + #[tokio::test] + async fn service_integrity_mismatch_service_mode_hard_fails() { + let fx = e2e_fixture().await; + let sources = PatchSources::blobs_only(&fx.blobs); + let bytes = b"the real wheel bytes"; + let wrong = sri_sha512(b"different bytes entirely"); + let server = wiremock::MockServer::start().await; + mount_pypi_granted(&server, WHEEL_NAME, &wrong, bytes).await; + + let outcome = vendor_pypi( + "pkg:pypi/six@1.16.0", + &fx.site_packages, + &fx.root, + &fx.record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + Some(&pypi_service_cfg(&server.uri(), VendorSource::Service, false)), + ) + .await; + assert!(matches!(outcome, VendorOutcome::Refused { .. }), "got {outcome:?}"); + assert!( + !fx.root.join(format!(".socket/vendor/pypi/{UUID}/{WHEEL_NAME}")).exists(), + "nothing written on a hard fail" + ); + } + + /// `--offline` + `--vendor-source=service` refuses, never hitting the network. + #[tokio::test] + async fn offline_service_mode_refuses() { + let fx = e2e_fixture().await; + let sources = PatchSources::blobs_only(&fx.blobs); + let outcome = vendor_pypi( + "pkg:pypi/six@1.16.0", + &fx.site_packages, + &fx.root, + &fx.record, + &sources, + "2026-06-09T00:00:00Z", + false, + false, + // No server: offline must short-circuit before any request. + Some(&pypi_service_cfg("http://127.0.0.1:1", VendorSource::Service, true)), + ) + .await; + match outcome { + VendorOutcome::Refused { code, .. } => { + assert_eq!(code, "vendor_service_offline_conflict") + } + other => panic!("expected Refused, got {other:?}"), + } + } } diff --git a/crates/socket-patch-core/src/patch/vendor/registry_fetch.rs b/crates/socket-patch-core/src/patch/vendor/registry_fetch.rs index 618b4a0..d03908f 100644 --- a/crates/socket-patch-core/src/patch/vendor/registry_fetch.rs +++ b/crates/socket-patch-core/src/patch/vendor/registry_fetch.rs @@ -133,7 +133,10 @@ pub async fn fetch_and_stage( /// Traversal-guarded zip extraction. `strip_first` mirrors the tar /// behavior (composer dist zips carry a variable top dir; wheels carry /// content at the root). -fn extract_zip(bytes: &[u8], dest: &Path, strip_first: bool) -> Result<(), String> { +/// +/// `pub(crate)` so the composer service-download path can extract a downloaded +/// dist zip into the vendor copy dir (`strip_first` = drop the top-level dir). +pub(crate) fn extract_zip(bytes: &[u8], dest: &Path, strip_first: bool) -> Result<(), String> { let mut archive = zip::ZipArchive::new(std::io::Cursor::new(bytes)) .map_err(|e| format!("unreadable zip: {e}"))?; if archive.len() > MAX_ENTRIES { @@ -246,43 +249,10 @@ async fn fetch_gem( let bytes = download(client, &url).await.map_err(FetchError::Failed)?; verify_integrity(&bytes, &entry.integrity)?; - // Locate data.tar.gz inside the (uncompressed) outer tar. - let mut archive = tar::Archive::new(bytes.as_slice()); - let mut data: Option> = None; - for e in archive - .entries() - .map_err(|e| FetchError::Failed(format!("unreadable .gem: {e}")))? - { - use std::io::Read as _; - let mut e = e.map_err(|err| FetchError::Failed(format!("unreadable .gem entry: {err}")))?; - let is_data = e - .path() - .ok() - .is_some_and(|p| p.as_os_str() == "data.tar.gz"); - if !is_data { - continue; - } - if e.header().size().unwrap_or(u64::MAX) > MAX_DOWNLOAD_BYTES { - return Err(FetchError::Failed( - "data.tar.gz exceeds the size cap".into(), - )); - } - let mut buf = Vec::new(); - e.read_to_end(&mut buf) - .map_err(|err| FetchError::Failed(format!("cannot read data.tar.gz: {err}")))?; - data = Some(buf); - break; - } - let Some(data) = data else { - return Err(FetchError::Failed(format!( - "fetched .gem for {}@{} carries no data.tar.gz", - entry.name, entry.version - ))); - }; let tmp = tempfile::tempdir() .map_err(|e| FetchError::Failed(format!("cannot create fetch tempdir: {e}")))?; let dir = tmp.path().join("gem"); - extract_tgz_no_strip(&data, &dir).map_err(FetchError::Failed)?; + extract_gem_data(&bytes, &dir).map_err(FetchError::Failed)?; Ok(FetchedPackage { dir, url, @@ -468,12 +438,31 @@ fn go_h1_of_zip(bytes: &[u8]) -> Result { )) } +/// Verify a golang module zip's `h1:` dirhash against an expected value. +/// +/// The vendoring service reports `dirhashH1` for golang artifacts (what +/// `go mod verify` checks); the service-download path uses this to confirm the +/// downloaded zip's CONTENTS — not just its bytes — match. +pub(crate) fn verify_go_h1(bytes: &[u8], expected_h1: &str) -> Result<(), String> { + let actual = go_h1_of_zip(bytes)?; + if actual == expected_h1 { + Ok(()) + } else { + Err(format!( + "go module dirhash mismatch: service reports {expected_h1}, the downloaded zip \ + hashes to {actual}" + )) + } +} + /// Traversal-guarded zip extraction with an EXPLICIT required prefix /// (`@/` — go module paths contain slashes, so a /// first-component strip would be wrong). Same guard family as /// [`extract_tgz`]; an entry outside the prefix fails the whole artifact. #[cfg(feature = "golang")] -fn extract_zip_with_prefix(bytes: &[u8], dest: &Path, prefix: &str) -> Result<(), String> { +/// `pub(crate)` so the golang service-download path can extract a downloaded +/// module zip (entries prefixed `{module}@{version}/`) into the vendor copy dir. +pub(crate) fn extract_zip_with_prefix(bytes: &[u8], dest: &Path, prefix: &str) -> Result<(), String> { let mut archive = zip::ZipArchive::new(std::io::Cursor::new(bytes)) .map_err(|e| format!("unreadable module zip: {e}"))?; for i in 0..archive.len() { @@ -837,17 +826,56 @@ fn strip_first_component(path: &Path) -> Option { /// bytes-only extraction would silently strip bin scripts' exec bits). /// Fails CLOSED on any traversal-shaped entry — a malicious tarball must /// not half-extract. -fn extract_tgz(bytes: &[u8], dest: &Path) -> Result<(), String> { +/// +/// `pub(crate)` so the cargo service-download path can extract a downloaded +/// `.crate` (tar.gz, single top-level `{name}-{version}/` prefix) into the +/// vendor copy dir — the same content the local `fresh_copy` produces. +pub(crate) fn extract_tgz(bytes: &[u8], dest: &Path) -> Result<(), String> { extract_tar_gz(bytes, dest, /*strip_first=*/ true) } /// Like [`extract_tgz`] but keeps entry paths verbatim (gem `data.tar.gz` /// archives carry package content at the root, no prefix dir). -#[allow(dead_code)] // used by the gem fetcher (feature-independent helper) fn extract_tgz_no_strip(bytes: &[u8], dest: &Path) -> Result<(), String> { extract_tar_gz(bytes, dest, /*strip_first=*/ false) } +/// Extract a `.gem`'s package content into `dest`. A `.gem` is a plain +/// (uncompressed) outer tar holding `data.tar.gz` (the lib files, at the ROOT +/// — no prefix dir), `metadata.gz`, and `checksums.yaml.gz`; only +/// `data.tar.gz` carries content a path source loads, so it is the only member +/// extracted (verbatim paths, no strip). Fails closed when the member is +/// missing or exceeds the size cap. +/// +/// `pub(crate)` so the gem service-download path can extract a downloaded, +/// integrity-verified `.gem` into the vendor copy dir — the same content the +/// local `fresh_copy(installed_dir)` produces. +pub(crate) fn extract_gem_data(gem_bytes: &[u8], dest: &Path) -> Result<(), String> { + use std::io::Read as _; + let mut archive = tar::Archive::new(gem_bytes); + for e in archive + .entries() + .map_err(|e| format!("unreadable .gem: {e}"))? + { + let mut e = e.map_err(|err| format!("unreadable .gem entry: {err}"))?; + let is_data = e + .path() + .ok() + .is_some_and(|p| p.as_os_str() == "data.tar.gz"); + if !is_data { + continue; + } + if e.header().size().unwrap_or(u64::MAX) > MAX_DOWNLOAD_BYTES { + return Err("data.tar.gz exceeds the size cap".into()); + } + let mut buf = Vec::new(); + e.read_to_end(&mut buf) + .map_err(|err| format!("cannot read data.tar.gz: {err}"))?; + return extract_tgz_no_strip(&buf, dest); + } + Err("the .gem carries no data.tar.gz".to_string()) +} + fn extract_tar_gz(bytes: &[u8], dest: &Path, strip_first: bool) -> Result<(), String> { use std::io::Read as _; let gz = flate2::read::GzDecoder::new(bytes).take(MAX_TOTAL_DECOMPRESSED_BYTES); diff --git a/crates/socket-patch-core/src/patch/vendor/service_fetch.rs b/crates/socket-patch-core/src/patch/vendor/service_fetch.rs new file mode 100644 index 0000000..702366f --- /dev/null +++ b/crates/socket-patch-core/src/patch/vendor/service_fetch.rs @@ -0,0 +1,261 @@ +//! Shared download-and-verify for the patch.socket.dev vendoring service. +//! +//! Every ecosystem's service path funnels through [`fetch_verified_archive`]: +//! it calls the two-step package-reference + download flow on the API client, +//! then integrity-verifies the bytes BEFORE they are ever written/extracted. +//! Verification is fail-closed — a byte/hash mismatch is always a hard error +//! (`IntegrityMismatch`), never a silent fallback to a wrong artifact. The +//! per-ecosystem backends own the placement (Tier A: write the archive; Tier B: +//! extract it into the vendor directory) and the build-vs-service policy. + +use crate::api::client::{SecondaryArtifact, VendorServiceOutcome}; +use crate::patch::vendor::lock_inventory::LockIntegrity; +use crate::patch::vendor::registry_fetch::{artifact_matches_integrity, verify_go_h1}; +use crate::patch::vendor::VendorServiceConfig; + +/// A service archive whose bytes have passed integrity verification. +/// +/// Deliberately minimal: every consumer recomputes the hashes it needs from +/// `bytes` (so a service-downloaded artifact describes itself byte-identically +/// to a local build), so the service-reported sha1/md5/size are not re-carried. +#[derive(Debug, Clone)] +pub(crate) struct VerifiedArchive { + /// The verified archive bytes (npm `.tgz`, pypi `.whl`/sdist, cargo + /// `.crate`, golang/composer `.zip`, gem `.gem`, …). + pub bytes: Vec, + /// Normalized sha512 SRI (`sha512-`) of the bytes — what npm/pypi/etc. + /// lockfiles that key on sha512 embed verbatim. + pub integrity_sri: String, + /// The (possibly host-rewritten) URL the bytes came from — for logging. + pub source_url: String, + /// The OTHER served artifacts (e.g. gem's path-source stub gemspec), still + /// unverified — a backend that needs one calls [`fetch_verified_secondary`] + /// to download + integrity-verify it on demand. + pub secondary: Vec, +} + +/// Result of attempting a service download for one patch UUID. +/// +/// The backends map this onto the `auto` / `service` policy: `Ready` → use it; +/// `Pending` / `Unavailable` / `Failed` → fall back to a local build under +/// `auto` (or hard-fail under `service`); `IntegrityMismatch` → ALWAYS a hard +/// error regardless of mode. +#[derive(Debug)] +pub(crate) enum ServiceArtifact { + Ready(VerifiedArchive), + /// Archive still building (retryable). + Pending, + /// Terminal miss for this input (not built / withdrawn / not found / no + /// usable artifact / service not configured). `String` is a log reason. + Unavailable(String), + /// Request / transport / auth failure. `String` is a log reason. + Failed(String), + /// Bytes downloaded but failed integrity verification — never fall back. + IntegrityMismatch(String), +} + +/// Download and integrity-verify the prebuilt archive for `uuid`. +/// +/// `verify_name` is only consulted for the (npm) yarn-berry checksum kind, +/// which v1 never verifies here — pass the package's bare name for forward +/// compatibility. Verification always checks the sha512 floor and, when the +/// service supplied a golang `h1:` dirhash, that too (it covers the zip's +/// contents, which `go mod verify` relies on). +pub(crate) async fn fetch_verified_archive( + cfg: &VendorServiceConfig, + uuid: &str, + verify_name: &str, +) -> ServiceArtifact { + let Some(client) = cfg.client.as_ref() else { + return ServiceArtifact::Unavailable("vendor service not configured".to_string()); + }; + + let outcome = client + .fetch_vendor_package( + uuid, + cfg.use_public_proxy, + cfg.vendor_url.as_deref(), + cfg.patch_server_url.as_deref(), + ) + .await; + + let pkg = match outcome { + VendorServiceOutcome::Ready(pkg) => pkg, + VendorServiceOutcome::Pending => return ServiceArtifact::Pending, + VendorServiceOutcome::Unavailable(reason) => return ServiceArtifact::Unavailable(reason), + VendorServiceOutcome::Failed(err) => return ServiceArtifact::Failed(err.to_string()), + }; + + // sha512 floor — every ecosystem's tarball carries it. + if let Err(e) = artifact_matches_integrity( + &pkg.tarball, + verify_name, + &LockIntegrity::Sri(pkg.integrity_sri.clone()), + ) { + return ServiceArtifact::IntegrityMismatch(e); + } + // golang module-zip dirhash, when supplied (verifies CONTENTS, not just + // bytes). Ecosystem-agnostic: only runs when the service reported one. + if let Some(h1) = pkg.dirhash_h1.as_deref() { + if let Err(e) = verify_go_h1(&pkg.tarball, h1) { + return ServiceArtifact::IntegrityMismatch(e); + } + } + + ServiceArtifact::Ready(VerifiedArchive { + bytes: pkg.tarball, + integrity_sri: pkg.integrity_sri, + source_url: pkg.source_url, + secondary: pkg.secondary_artifacts, + }) +} + +/// Outcome of fetching + verifying a named secondary artifact. +pub(crate) enum SecondaryArtifactResult { + /// Bytes downloaded and sha512-verified. + Ready(Vec), + /// No artifact of this kind was served (e.g. a native-extension gem emits + /// no stub, or an old row predates the rebuild) — a terminal miss. + Absent, + /// Request / transport / auth failure. `String` is a log reason. + Failed(String), + /// Bytes downloaded but failed integrity verification — never fall back. + IntegrityMismatch(String), +} + +/// Download + integrity-verify the secondary artifact of `kind` (e.g. +/// `gem-stub-gemspec`) referenced by a [`VerifiedArchive`]. +/// +/// `verify_name` is the package's bare name (only consulted by the yarn-berry +/// checksum kind, which never reaches here). The bytes are verified against the +/// artifact's own sha512 SRI, fail-closed like the primary archive. Returns +/// `Absent` when the archive referenced no artifact of this kind — the caller +/// treats that as a miss (fall back under `auto`, refuse under `service`). +pub(crate) async fn fetch_verified_secondary( + cfg: &VendorServiceConfig, + archive: &VerifiedArchive, + kind: &str, + verify_name: &str, +) -> SecondaryArtifactResult { + let Some(client) = cfg.client.as_ref() else { + return SecondaryArtifactResult::Failed("vendor service not configured".to_string()); + }; + let Some(artifact) = archive.secondary.iter().find(|a| a.kind == kind) else { + return SecondaryArtifactResult::Absent; + }; + + let bytes = match client.download_artifact(&artifact.url).await { + Ok(bytes) => bytes, + Err(e) => return SecondaryArtifactResult::Failed(e.to_string()), + }; + + if let Err(e) = artifact_matches_integrity( + &bytes, + verify_name, + &LockIntegrity::Sri(artifact.integrity_sri.clone()), + ) { + return SecondaryArtifactResult::IntegrityMismatch(e); + } + SecondaryArtifactResult::Ready(bytes) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::client::{ApiClient, ApiClientOptions}; + use crate::patch::vendor::npm_pack::PackedTarball; + use crate::patch::vendor::VendorSource; + use serde_json::json; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + const UUID: &str = "22222222-2222-2222-2222-222222222222"; + const SERVE_PATH: &str = "/patch/npm/x/1.0.0/tok/uuid/x-1.0.0.tgz"; + + fn cfg_for(server: &MockServer) -> VendorServiceConfig { + VendorServiceConfig { + source: VendorSource::Service, + client: Some(ApiClient::new(ApiClientOptions { + api_url: server.uri(), + api_token: Some("sktsec_placeholder_value_for_tests_api".into()), + use_public_proxy: false, + org_slug: Some("acme".into()), + })), + use_public_proxy: false, + vendor_url: None, + patch_server_url: None, + offline: false, + } + } + + async fn mount_granted(server: &MockServer, sha512: &str, body: &[u8]) { + let serve_url = format!("{}{SERVE_PATH}", server.uri()); + Mock::given(method("POST")) + .and(path("/v0/orgs/acme/patches/package")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "results": { UUID: { + "status": "granted", + "url": serve_url, + "artifacts": [{ "kind": "tarball", "url": serve_url, + "integrity": { "sha512": sha512 } }] + }} + }))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path(SERVE_PATH)) + .respond_with(ResponseTemplate::new(200).set_body_bytes(body.to_vec())) + .mount(server) + .await; + } + + /// The verify floor accepts bytes whose sha512 matches the service SRI. + #[tokio::test] + async fn ready_when_sha512_matches() { + let server = MockServer::start().await; + let body = b"verified archive bytes"; + let sri = PackedTarball::from_bytes(body).integrity; + mount_granted(&server, &sri, body).await; + + match fetch_verified_archive(&cfg_for(&server), UUID, "x").await { + ServiceArtifact::Ready(v) => { + assert_eq!(v.bytes, body); + assert_eq!(v.integrity_sri, sri); + assert!(v.source_url.ends_with(SERVE_PATH)); + } + other => panic!("expected Ready, got {other:?}"), + } + } + + /// Fail-closed: bytes whose sha512 disagrees with the service SRI are an + /// IntegrityMismatch (never silently used / fallen back from here). + #[tokio::test] + async fn integrity_mismatch_when_sha512_wrong() { + let server = MockServer::start().await; + let body = b"the real bytes"; + let wrong = PackedTarball::from_bytes(b"completely different bytes").integrity; + mount_granted(&server, &wrong, body).await; + + assert!(matches!( + fetch_verified_archive(&cfg_for(&server), UUID, "x").await, + ServiceArtifact::IntegrityMismatch(_) + )); + } + + /// A config without a client is a quiet Unavailable, not a panic. + #[tokio::test] + async fn unavailable_when_client_absent() { + let cfg = VendorServiceConfig { + source: VendorSource::Auto, + client: None, + use_public_proxy: false, + vendor_url: None, + patch_server_url: None, + offline: false, + }; + assert!(matches!( + fetch_verified_archive(&cfg, UUID, "x").await, + ServiceArtifact::Unavailable(_) + )); + } +} diff --git a/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs b/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs index 2dd101b..451719c 100644 --- a/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs @@ -75,6 +75,7 @@ pub async fn vendor_yarn_berry( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&super::VendorServiceConfig>, ) -> VendorOutcome { let mut warnings: Vec = Vec::new(); @@ -268,6 +269,7 @@ pub async fn vendor_yarn_berry( dry_run, force, &mut warnings, + service, ) .await { @@ -1132,6 +1134,7 @@ __metadata: "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await } diff --git a/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs b/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs index 6278bb8..f253103 100644 --- a/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs @@ -60,6 +60,7 @@ pub async fn vendor_yarn_classic( vendored_at: &str, dry_run: bool, force: bool, + service: Option<&super::VendorServiceConfig>, ) -> VendorOutcome { let mut warnings: Vec = Vec::new(); @@ -136,6 +137,7 @@ pub async fn vendor_yarn_classic( dry_run, force, &mut warnings, + service, ) .await { @@ -951,6 +953,7 @@ left-pad@^1.3.0, left-pad@~1.3.0: "2026-06-09T00:00:00Z", dry_run, false, + None, ) .await }