From b75489ac0797107b22e9e77abf286d08b74e7549 Mon Sep 17 00:00:00 2001 From: Adir Amsalem Date: Thu, 18 Jun 2026 18:16:29 +0300 Subject: [PATCH] feat(realtime): add connection pre-warming via startMuted + mute/unmute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Establish an authenticated, idle session up front and hold it unbilled, then go live with no startup latency. Adds ConnectOptions::startMuted (publishes the input track muted) and RealtimeSession::mute()/unmute(). An idle WebRTC track emits keepalive frames, so we mute the LiveKit local track before publishing — muting, not withholding captureFrame, is what keeps the warmed session free. Includes the realtime_warmup example, README docs, and thread-safe access to the published track. Verified live: a session alive 18.4s billed only 5s (the post-unmute generation window); the muted warmup window billed zero. --- README.md | 26 +++++++++++ examples/CMakeLists.txt | 5 +++ examples/realtime_warmup.cpp | 68 +++++++++++++++++++++++++++++ include/decart/realtime/realtime.h | 9 ++++ include/decart/realtime/session.h | 16 +++++++ src/realtime/session.cpp | 70 +++++++++++++++++++++++++++--- 6 files changed, 189 insertions(+), 5 deletions(-) create mode 100644 examples/realtime_warmup.cpp diff --git a/README.md b/README.md index 7edc1ec..6f1ed19 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,32 @@ decode/encode): DECART_API_KEY=... ./realtime_video input.mp4 output.mp4 "put them all in space" lucy-2.1 ``` +### Connection pre-warming + +Billing is tied to active generation — the seconds during which frames flow to +the model. You can establish the authenticated session and WebRTC media path +ahead of time and hold it idle at no cost by connecting with `startMuted = true`, +then `unmute()` when the user is ready. This removes startup latency from the +moment they hit Start. (An idle WebRTC track still emits keepalive frames, so +muting — not merely withholding `captureFrame()` — is what keeps a warmed session +unbilled.) + +```cpp +decart::ConnectOptions options; +options.model = model; +options.initialState.prompt = decart::Prompt{"A watercolor painting", /*enhance=*/true}; +options.startMuted = true; // warm, but transmit nothing + +auto session = client.realtime().connect(source, options); // authenticated, idle, unbilled +// ... later, when the user clicks Start: +session->unmute(); // generation (and billing) begins +// source->captureFrame(myFrame); // push frames from here on +// If the user cancels first, session->disconnect() — no charge. +``` + +See [`examples/realtime_warmup.cpp`](examples/realtime_warmup.cpp) for a complete, +runnable example. + ### Authentication (client tokens) Create a short-lived token server-side to hand to an untrusted client: diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 83ecff1..856f25d 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -15,6 +15,11 @@ if(DECART_BUILD_REALTIME) target_link_libraries(realtime_synthetic PRIVATE decart::decart) decart_copy_livekit_runtime(realtime_synthetic) + # Connection pre-warming: connect muted (no billing), then unmute to go live. + add_executable(realtime_warmup realtime_warmup.cpp) + target_link_libraries(realtime_warmup PRIVATE decart::decart) + decart_copy_livekit_runtime(realtime_warmup) + # Streams a video file through a model and saves the result (requires ffmpeg). add_executable(realtime_video realtime_video.cpp) target_link_libraries(realtime_video PRIVATE decart::decart) diff --git a/examples/realtime_warmup.cpp b/examples/realtime_warmup.cpp new file mode 100644 index 0000000..6791fa1 --- /dev/null +++ b/examples/realtime_warmup.cpp @@ -0,0 +1,68 @@ +// Copyright 2026 Decart. SPDX-License-Identifier: MIT +// +// Connection pre-warming: establish an authenticated, idle session up front so +// going live has no startup latency. Billing tracks active generation, so a +// muted warmed connection costs nothing; `unmute()` + capturing frames is what +// starts generation (and billing). An idle WebRTC track still emits keepalive +// frames, so muting — not merely withholding frames — is what keeps it free. +// +// DECART_API_KEY=sk-... ./realtime_warmup [model] +// +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + const char* apiKey = std::getenv("DECART_API_KEY"); + if (apiKey == nullptr) { + std::cerr << "Set DECART_API_KEY to run this example.\n"; + return 1; + } + const std::string modelName = argc > 1 ? argv[1] : "lucy-restyle-2"; + + try { + decart::Client client; // reads DECART_API_KEY + const decart::ModelDefinition model = decart::models::realtime(modelName); + auto source = std::make_shared(model.width, model.height); + + decart::ConnectOptions options; + options.model = model; + options.initialState.prompt = decart::Prompt{"A watercolor painting", true}; + options.startMuted = true; // warm the connection without transmitting (no billing) + options.onConnectionState = [](decart::ConnectionState state) { + // Stays "connected" while warmed; flips to "generating" once frames flow. + std::cout << "[state] " << decart::toString(state) << "\n"; + }; + + // 1. Warmup: connects, authenticates, applies the prompt — transmits nothing. + std::cout << "Warming up...\n"; + auto session = client.realtime().connect(source, options); + std::cout << "Warmed and idle (muted) — no billing. Holding...\n"; + std::this_thread::sleep_for(std::chrono::seconds(3)); + // (To cancel before going live: session->disconnect() — no charge.) + + // 2. Go live: unmute, then push ~5s of frames. Billing starts here. + std::cout << "Go live!\n"; + session->unmute(); + const auto interval = std::chrono::milliseconds(1000 / std::max(1, model.fps)); + for (int i = 0; i < model.fps * 5; ++i) { + auto frame = livekit::VideoFrame::create(model.width, model.height, livekit::VideoBufferType::RGBA); + std::fill(frame.data(), frame.data() + frame.dataSize(), std::uint8_t{128}); // solid gray + source->captureFrame(frame); + std::this_thread::sleep_for(interval); + } + + session->disconnect(); + return 0; + } catch (const decart::Exception& e) { + std::cerr << "Decart error [" << decart::toString(e.code()) << "]: " << e.what() << "\n"; + return 1; + } +} diff --git a/include/decart/realtime/realtime.h b/include/decart/realtime/realtime.h index 04fe8ec..e245c85 100644 --- a/include/decart/realtime/realtime.h +++ b/include/decart/realtime/realtime.h @@ -61,6 +61,15 @@ struct ConnectOptions { /// Optional output resolution hint ("720p" or "1080p"). std::optional resolution; + /// Publish the input track muted, so no frames reach the model until you call + /// `RealtimeSession::unmute()`. Use this to pre-warm a connection: the session + /// is fully authenticated and the WebRTC media path is established, but nothing + /// is transmitted — and no generation is billed — while muted. An idle video + /// track still emits keepalive frames over WebRTC, so muting (rather than just + /// withholding `captureFrame`) is what keeps a warmed session free. Pair with + /// `initialState` to apply the prompt/image during the idle warmup phase. + bool startMuted = false; + /// Bound on the signaling handshake (socket open + room join). The LiveKit /// media connect manages its own timeout. Default 60s. std::chrono::milliseconds connectTimeout{60000}; diff --git a/include/decart/realtime/session.h b/include/decart/realtime/session.h index 7a70815..62575ee 100644 --- a/include/decart/realtime/session.h +++ b/include/decart/realtime/session.h @@ -59,6 +59,22 @@ class RealtimeSession { /// Blocks until acknowledged. void setImage(const std::optional& image, const UpdateOptions& options = {}); + /// Stop transmitting video to the model without tearing down the connection. + /// The published track stays live and resumes with `unmute()` — no + /// renegotiation. Use this for connection pre-warming: connect (optionally with + /// `ConnectOptions::startMuted`), hold the authenticated session idle at no + /// generation cost, then `unmute()` and start capturing frames when the user is + /// ready. Local operation; `@throws decart::Exception` if the session has + /// disconnected. + void mute(); + + /// Resume transmitting to the model after `mute()` (or after connecting with + /// `ConnectOptions::startMuted`). Once unmuted the track sends again — + /// including WebRTC keepalive frames — so generation, and billing, can begin + /// even before your first `captureFrame`. Unmute only when you are ready to go + /// live. `@throws decart::Exception` if the session has disconnected. + void unmute(); + /// True while the session is connected (state `Connected` or `Generating`). bool isConnected() const noexcept; diff --git a/src/realtime/session.cpp b/src/realtime/session.cpp index 9a5ee49..a30fb14 100644 --- a/src/realtime/session.cpp +++ b/src/realtime/session.cpp @@ -136,8 +136,29 @@ class MediaSession : public livekit::RoomDelegate { // Generation finished; fall back to Connected (a new run re-enters Generating). void markGenerationEnded() { transitionIf(ConnectionState::Generating, ConnectionState::Connected); } - void connectRoom(const RoomInfo& info, const std::shared_ptr& source, - int publishFps) { + // Stop/resume transmitting the published input track without renegotiation. + // Used for connection pre-warming (see ConnectOptions::startMuted). No-op when + // there is no published track (e.g. a subscribe-only session). + void mute() { + std::shared_ptr track; + { + std::lock_guard lk(trackMutex_); + track = publishedTrack_; + } + if (track) track->mute(); + } + + void unmute() { + std::shared_ptr track; + { + std::lock_guard lk(trackMutex_); + track = publishedTrack_; + } + if (track) track->unmute(); + } + + void connectRoom(const RoomInfo& info, const std::shared_ptr& source, int publishFps, + bool startMuted) { room_ = std::make_unique(); room_->setDelegate(this); @@ -157,6 +178,14 @@ class MediaSession : public livekit::RoomDelegate { if (!local) throw Exception(Error{ErrorCode::MediaError, "Local participant unavailable"}); auto track = livekit::LocalVideoTrack::createLocalVideoTrack("decart-input", source); + + // Pre-warming: mute BEFORE publishing so no media ever reaches the + // inference server until unmute(). An idle WebRTC track emits keepalive + // frames (and the source may already be producing), so a track that goes + // live unmuted — even briefly — can start generation and billing. Muting + // after publish would leave that window open. + if (startMuted) track->mute(); + livekit::TrackPublishOptions publish; publish.source = livekit::TrackSource::SOURCE_CAMERA; publish.simulcast = false; @@ -165,7 +194,17 @@ class MediaSession : public livekit::RoomDelegate { encoding.max_framerate = static_cast(publishFps); publish.video_encoding = encoding; local->publishTrack(track, publish); - publishedTrack_ = std::move(track); + + // Re-assert muted state as a guarantee: a muted track stays published and + // unmutes later with no renegotiation. This is a no-op if the pre-publish + // mute already took effect, and a safety net if it did not register on the + // not-yet-published track. + if (startMuted) track->mute(); + + { + std::lock_guard lk(trackMutex_); + publishedTrack_ = std::move(track); + } } } @@ -184,7 +223,10 @@ class MediaSession : public livekit::RoomDelegate { room_->disconnect(); room_.reset(); } - publishedTrack_.reset(); + { + std::lock_guard lk(trackMutex_); + publishedTrack_.reset(); + } setState(ConnectionState::Disconnected); } @@ -243,6 +285,7 @@ class MediaSession : public livekit::RoomDelegate { private: std::unique_ptr room_; + mutable std::mutex trackMutex_; std::shared_ptr publishedTrack_; mutable std::mutex stateMutex_; @@ -349,6 +392,23 @@ void RealtimeSession::setImage(const std::optional& image, const Upd impl_->signaling->setImageData(base64, prompt, enhance, options.timeout); } +void RealtimeSession::mute() { + // Local track operation — allowed in any live state (including Reconnecting), + // so a transient media blip doesn't reject a warmup unmute when the user hits + // Start. Only a fully disconnected session is rejected. + if (impl_->media.state() == ConnectionState::Disconnected) { + throw Exception(Error{ErrorCode::NotConnected, "Realtime session is not connected"}); + } + impl_->media.mute(); +} + +void RealtimeSession::unmute() { + if (impl_->media.state() == ConnectionState::Disconnected) { + throw Exception(Error{ErrorCode::NotConnected, "Realtime session is not connected"}); + } + impl_->media.unmute(); +} + bool RealtimeSession::isConnected() const noexcept { return impl_->media.isConnected(); } ConnectionState RealtimeSession::connectionState() const noexcept { return impl_->media.state(); } @@ -447,7 +507,7 @@ std::unique_ptr connectRealtime(std::shared_ptr c impl->signaling->sendPrompt(initial.prompt->text, initial.prompt->enhance, kPromptTimeout); } - impl->media.connectRoom(info, source, options.model.fps); + impl->media.connectRoom(info, source, options.model.fps, options.startMuted); // With no explicit initial state, a passthrough set_image kicks off generation. if (!hasInitialState) {