Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/openshell-sandbox/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ pub async fn run_sandbox(
opa_engine.as_ref(),
retained_proto.as_ref(),
entrypoint_pid.clone(),
process_enabled,
&provider_credentials,
sandbox_id.as_deref(),
sandbox_name_for_agg.as_deref(),
Expand Down
25 changes: 25 additions & 0 deletions crates/openshell-supervisor-network/src/proxy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ impl ProxyHandle {
policy_local_ctx: Option<Arc<PolicyLocalContext>>,
denial_tx: Option<mpsc::UnboundedSender<DenialEvent>>,
activity_tx: Option<ActivitySender>,
engine_ready: tokio::sync::watch::Receiver<bool>,
) -> Result<Self> {
// Use override bind_addr, fall back to policy http_addr, then default
// to loopback:3128. The default allows the proxy to function when no
Expand Down Expand Up @@ -229,6 +230,30 @@ impl ProxyHandle {
}

let join = tokio::spawn(async move {
// Wait for the OPA engine's symlink resolution reload to complete
// before accepting connections. This prevents requests from
// observing a generation transition mid-flight, which would cause
// the generation guard to reject them with a 403.
//
// The TCP listener is already bound, so the OS backlog queues
// incoming SYN packets during this wait. Once we start accepting,
// queued connections drain immediately.
let mut engine_ready = engine_ready;
match tokio::time::timeout(
std::time::Duration::from_secs(15),
engine_ready.wait_for(|v| *v),
)
.await
{
Ok(_) => {}
Err(_) => {
warn!(
"Engine readiness signal not received within 15s; \
proceeding with proxy accept loop"
);
}
}

loop {
match listener.accept().await {
Ok((stream, _addr)) => {
Expand Down
136 changes: 78 additions & 58 deletions crates/openshell-supervisor-network/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ pub async fn run_networking(
opa_engine: Option<&Arc<OpaEngine>>,
retained_proto: Option<&ProtoSandboxPolicy>,
entrypoint_pid: Arc<AtomicU32>,
process_enabled: bool,
provider_credentials: &ProviderCredentialState,
sandbox_id: Option<&str>,
sandbox_name: Option<&str>,
Expand All @@ -97,80 +98,98 @@ pub async fn run_networking(
.or_else(|| sandbox_id.map(str::to_string)),
));

// Readiness signal for the proxy accept loop: the proxy binds the TCP
// listener immediately (so the OS backlog queues early SYN packets) but
// defers `accept()` until symlink resolution completes. This eliminates
// the race where an in-flight request observes a generation transition
// during the OPA engine reload.
let (engine_ready_tx, engine_ready_rx) = tokio::sync::watch::channel(false);

// Spawn a task to resolve policy binary symlinks once the workload's mount
// namespace becomes accessible via /proc/<pid>/root/. The task starts
// before run_process spawns the child, so first wait for the orchestrator
// to publish a non-zero PID, then poll for proc-root readiness.
if let (Some(engine), Some(proto)) = (opa_engine, retained_proto) {
let resolve_engine = engine.clone();
let resolve_proto = proto.clone();
let resolve_pid = entrypoint_pid.clone();
tokio::spawn(async move {
// Phase 1: wait for run_process to publish the entrypoint PID.
// 20 attempts * 250ms = 5s window.
let mut pid = 0;
for attempt in 1..=20 {
pid = resolve_pid.load(Ordering::Acquire);
if pid != 0 {
break;
if process_enabled {
let resolve_engine = engine.clone();
let resolve_proto = proto.clone();
let resolve_pid = entrypoint_pid.clone();
tokio::spawn(async move {
// Phase 1: wait for run_process to publish the entrypoint PID.
// 20 attempts * 250ms = 5s window.
let mut pid = 0;
for attempt in 1..=20 {
pid = resolve_pid.load(Ordering::Acquire);
if pid != 0 {
break;
}
debug!(
attempt,
"Entrypoint PID not yet published, waiting before symlink resolution"
);
tokio::time::sleep(Duration::from_millis(250)).await;
}
debug!(
attempt,
"Entrypoint PID not yet published, waiting before symlink resolution"
);
tokio::time::sleep(Duration::from_millis(250)).await;
}
if pid == 0 {
warn!(
"Entrypoint PID never published; binary symlink resolution skipped. \
if pid == 0 {
warn!(
"Entrypoint PID never published; binary symlink resolution skipped. \
Policy binary paths will be matched literally."
);
return;
}

// Phase 2: wait for /proc/<pid>/root/ to become traversable. The
// child's mount namespace is typically ready within a few hundred
// ms of spawn. 10 attempts * 500ms = 5s window.
let probe_path = format!("/proc/{pid}/root/");
for attempt in 1..=10 {
tokio::time::sleep(Duration::from_millis(500)).await;
if std::fs::metadata(&probe_path).is_ok() {
info!(
pid = pid,
attempt = attempt,
"Container filesystem accessible, resolving policy binary symlinks"
);
match resolve_engine.reload_from_proto_with_pid(&resolve_proto, pid) {
Ok(()) => {
info!(
pid = pid,
"Policy binary symlink resolution complete \
let _ = engine_ready_tx.send(true);
return;
}

// Phase 2: wait for /proc/<pid>/root/ to become traversable. The
// child's mount namespace is typically ready within a few hundred
// ms of spawn. 10 attempts * 500ms = 5s window.
let probe_path = format!("/proc/{pid}/root/");
for attempt in 1..=10 {
tokio::time::sleep(Duration::from_millis(500)).await;
if std::fs::metadata(&probe_path).is_ok() {
info!(
pid = pid,
attempt = attempt,
"Container filesystem accessible, resolving policy binary symlinks"
);
match resolve_engine.reload_from_proto_with_pid(&resolve_proto, pid) {
Ok(()) => {
info!(
pid = pid,
"Policy binary symlink resolution complete \
(check logs above for per-binary results)"
);
}
Err(e) => {
warn!(
"Failed to rebuild OPA engine with symlink resolution \
);
}
Err(e) => {
warn!(
"Failed to rebuild OPA engine with symlink resolution \
(non-fatal, falling back to literal path matching): {e}"
);
);
}
}
let _ = engine_ready_tx.send(true);
return;
}
return;
debug!(
pid = pid,
attempt = attempt,
probe_path = %probe_path,
"Container filesystem not yet accessible, retrying symlink resolution"
);
}
debug!(
pid = pid,
attempt = attempt,
probe_path = %probe_path,
"Container filesystem not yet accessible, retrying symlink resolution"
);
}
warn!(
"Container filesystem /proc/{pid}/root/ not accessible after 10 attempts (5s); \
warn!(
"Container filesystem /proc/{pid}/root/ not accessible after 10 attempts (5s); \
binary symlink resolution skipped. Policy binary paths will be matched literally. \
If binaries are symlinks, use canonical paths in your policy \
(run 'readlink -f <path>' inside the sandbox)"
);
});
);
let _ = engine_ready_tx.send(true);
});
} else {
// No process supervisor — PID will never arrive, skip symlink resolution.
let _ = engine_ready_tx.send(true);
}
} else {
// No symlink resolution needed — unblock the proxy immediately.
let _ = engine_ready_tx.send(true);
}

// Identity cache for SHA256 TOFU when OPA is active. Only consumed by
Expand Down Expand Up @@ -279,6 +298,7 @@ pub async fn run_networking(
Some(policy_local_ctx.clone()),
denial_tx,
activity_tx,
engine_ready_rx,
)
.await?;
Some(proxy_handle)
Expand Down
Loading