Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,32 @@ decode/encode):
DECART_API_KEY=... ./realtime_video input.mp4 output.mp4 "put them all in space" lucy-2.1
```

### Connection pre-warming

Billing is tied to active generation — the seconds during which frames flow to
the model. You can establish the authenticated session and WebRTC media path
ahead of time and hold it idle at no cost by connecting with `startMuted = true`,
then `unmute()` when the user is ready. This removes startup latency from the
moment they hit Start. (An idle WebRTC track still emits keepalive frames, so
muting — not merely withholding `captureFrame()` — is what keeps a warmed session
unbilled.)

```cpp
decart::ConnectOptions options;
options.model = model;
options.initialState.prompt = decart::Prompt{"A watercolor painting", /*enhance=*/true};
options.startMuted = true; // warm, but transmit nothing

auto session = client.realtime().connect(source, options); // authenticated, idle, unbilled
// ... later, when the user clicks Start:
session->unmute(); // generation (and billing) begins
// source->captureFrame(myFrame); // push frames from here on
// If the user cancels first, session->disconnect() — no charge.
```

See [`examples/realtime_warmup.cpp`](examples/realtime_warmup.cpp) for a complete,
runnable example.

### Authentication (client tokens)

Create a short-lived token server-side to hand to an untrusted client:
Expand Down
5 changes: 5 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ if(DECART_BUILD_REALTIME)
target_link_libraries(realtime_synthetic PRIVATE decart::decart)
decart_copy_livekit_runtime(realtime_synthetic)

# Connection pre-warming: connect muted (no billing), then unmute to go live.
add_executable(realtime_warmup realtime_warmup.cpp)
target_link_libraries(realtime_warmup PRIVATE decart::decart)
decart_copy_livekit_runtime(realtime_warmup)

# Streams a video file through a model and saves the result (requires ffmpeg).
add_executable(realtime_video realtime_video.cpp)
target_link_libraries(realtime_video PRIVATE decart::decart)
Expand Down
68 changes: 68 additions & 0 deletions examples/realtime_warmup.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright 2026 Decart. SPDX-License-Identifier: MIT
//
// Connection pre-warming: establish an authenticated, idle session up front so
// going live has no startup latency. Billing tracks active generation, so a
// muted warmed connection costs nothing; `unmute()` + capturing frames is what
// starts generation (and billing). An idle WebRTC track still emits keepalive
// frames, so muting — not merely withholding frames — is what keeps it free.
//
// DECART_API_KEY=sk-... ./realtime_warmup [model]
//
#include <decart/decart.h>
#include <livekit/livekit.h>

#include <algorithm>
#include <chrono>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <string>
#include <thread>

int main(int argc, char** argv) {
const char* apiKey = std::getenv("DECART_API_KEY");
if (apiKey == nullptr) {
std::cerr << "Set DECART_API_KEY to run this example.\n";
return 1;
}
const std::string modelName = argc > 1 ? argv[1] : "lucy-restyle-2";

try {
decart::Client client; // reads DECART_API_KEY
const decart::ModelDefinition model = decart::models::realtime(modelName);
auto source = std::make_shared<livekit::VideoSource>(model.width, model.height);

decart::ConnectOptions options;
options.model = model;
options.initialState.prompt = decart::Prompt{"A watercolor painting", true};
options.startMuted = true; // warm the connection without transmitting (no billing)
options.onConnectionState = [](decart::ConnectionState state) {
// Stays "connected" while warmed; flips to "generating" once frames flow.
std::cout << "[state] " << decart::toString(state) << "\n";
};

// 1. Warmup: connects, authenticates, applies the prompt — transmits nothing.
std::cout << "Warming up...\n";
auto session = client.realtime().connect(source, options);
std::cout << "Warmed and idle (muted) — no billing. Holding...\n";
std::this_thread::sleep_for(std::chrono::seconds(3));
// (To cancel before going live: session->disconnect() — no charge.)

// 2. Go live: unmute, then push ~5s of frames. Billing starts here.
std::cout << "Go live!\n";
session->unmute();
const auto interval = std::chrono::milliseconds(1000 / std::max(1, model.fps));
for (int i = 0; i < model.fps * 5; ++i) {
auto frame = livekit::VideoFrame::create(model.width, model.height, livekit::VideoBufferType::RGBA);
std::fill(frame.data(), frame.data() + frame.dataSize(), std::uint8_t{128}); // solid gray
source->captureFrame(frame);
std::this_thread::sleep_for(interval);
}

session->disconnect();
return 0;
} catch (const decart::Exception& e) {
std::cerr << "Decart error [" << decart::toString(e.code()) << "]: " << e.what() << "\n";
return 1;
}
}
9 changes: 9 additions & 0 deletions include/decart/realtime/realtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ struct ConnectOptions {
/// Optional output resolution hint ("720p" or "1080p").
std::optional<std::string> resolution;

/// Publish the input track muted, so no frames reach the model until you call
/// `RealtimeSession::unmute()`. Use this to pre-warm a connection: the session
/// is fully authenticated and the WebRTC media path is established, but nothing
/// is transmitted — and no generation is billed — while muted. An idle video
/// track still emits keepalive frames over WebRTC, so muting (rather than just
/// withholding `captureFrame`) is what keeps a warmed session free. Pair with
/// `initialState` to apply the prompt/image during the idle warmup phase.
bool startMuted = false;

/// Bound on the signaling handshake (socket open + room join). The LiveKit
/// media connect manages its own timeout. Default 60s.
std::chrono::milliseconds connectTimeout{60000};
Expand Down
16 changes: 16 additions & 0 deletions include/decart/realtime/session.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,22 @@ class RealtimeSession {
/// Blocks until acknowledged.
void setImage(const std::optional<ImageInput>& image, const UpdateOptions& options = {});

/// Stop transmitting video to the model without tearing down the connection.
/// The published track stays live and resumes with `unmute()` — no
/// renegotiation. Use this for connection pre-warming: connect (optionally with
/// `ConnectOptions::startMuted`), hold the authenticated session idle at no
/// generation cost, then `unmute()` and start capturing frames when the user is
/// ready. Local operation; `@throws decart::Exception` if the session has
/// disconnected.
void mute();

/// Resume transmitting to the model after `mute()` (or after connecting with
/// `ConnectOptions::startMuted`). Once unmuted the track sends again —
/// including WebRTC keepalive frames — so generation, and billing, can begin
/// even before your first `captureFrame`. Unmute only when you are ready to go
/// live. `@throws decart::Exception` if the session has disconnected.
void unmute();

/// True while the session is connected (state `Connected` or `Generating`).
bool isConnected() const noexcept;

Expand Down
70 changes: 65 additions & 5 deletions src/realtime/session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,29 @@ class MediaSession : public livekit::RoomDelegate {
// Generation finished; fall back to Connected (a new run re-enters Generating).
void markGenerationEnded() { transitionIf(ConnectionState::Generating, ConnectionState::Connected); }

void connectRoom(const RoomInfo& info, const std::shared_ptr<livekit::VideoSource>& source,
int publishFps) {
// Stop/resume transmitting the published input track without renegotiation.
// Used for connection pre-warming (see ConnectOptions::startMuted). No-op when
// there is no published track (e.g. a subscribe-only session).
void mute() {
std::shared_ptr<livekit::LocalVideoTrack> track;
{
std::lock_guard<std::mutex> lk(trackMutex_);
track = publishedTrack_;
}
if (track) track->mute();
}

void unmute() {
std::shared_ptr<livekit::LocalVideoTrack> track;
{
std::lock_guard<std::mutex> lk(trackMutex_);
track = publishedTrack_;
}
if (track) track->unmute();
}

void connectRoom(const RoomInfo& info, const std::shared_ptr<livekit::VideoSource>& source, int publishFps,
bool startMuted) {
room_ = std::make_unique<livekit::Room>();
room_->setDelegate(this);

Expand All @@ -157,6 +178,14 @@ class MediaSession : public livekit::RoomDelegate {
if (!local) throw Exception(Error{ErrorCode::MediaError, "Local participant unavailable"});

auto track = livekit::LocalVideoTrack::createLocalVideoTrack("decart-input", source);

// Pre-warming: mute BEFORE publishing so no media ever reaches the
// inference server until unmute(). An idle WebRTC track emits keepalive
// frames (and the source may already be producing), so a track that goes
// live unmuted — even briefly — can start generation and billing. Muting
// after publish would leave that window open.
if (startMuted) track->mute();

livekit::TrackPublishOptions publish;
publish.source = livekit::TrackSource::SOURCE_CAMERA;
publish.simulcast = false;
Expand All @@ -165,7 +194,17 @@ class MediaSession : public livekit::RoomDelegate {
encoding.max_framerate = static_cast<double>(publishFps);
publish.video_encoding = encoding;
local->publishTrack(track, publish);
publishedTrack_ = std::move(track);

// Re-assert muted state as a guarantee: a muted track stays published and
// unmutes later with no renegotiation. This is a no-op if the pre-publish
// mute already took effect, and a safety net if it did not register on the
// not-yet-published track.
if (startMuted) track->mute();

{
std::lock_guard<std::mutex> lk(trackMutex_);
publishedTrack_ = std::move(track);
}
}
}

Expand All @@ -184,7 +223,10 @@ class MediaSession : public livekit::RoomDelegate {
room_->disconnect();
room_.reset();
}
publishedTrack_.reset();
{
std::lock_guard<std::mutex> lk(trackMutex_);
publishedTrack_.reset();
}
setState(ConnectionState::Disconnected);
}

Expand Down Expand Up @@ -243,6 +285,7 @@ class MediaSession : public livekit::RoomDelegate {

private:
std::unique_ptr<livekit::Room> room_;
mutable std::mutex trackMutex_;
std::shared_ptr<livekit::LocalVideoTrack> publishedTrack_;

mutable std::mutex stateMutex_;
Expand Down Expand Up @@ -349,6 +392,23 @@ void RealtimeSession::setImage(const std::optional<ImageInput>& image, const Upd
impl_->signaling->setImageData(base64, prompt, enhance, options.timeout);
}

void RealtimeSession::mute() {
// Local track operation — allowed in any live state (including Reconnecting),
// so a transient media blip doesn't reject a warmup unmute when the user hits
// Start. Only a fully disconnected session is rejected.
if (impl_->media.state() == ConnectionState::Disconnected) {
throw Exception(Error{ErrorCode::NotConnected, "Realtime session is not connected"});
}
impl_->media.mute();
}

void RealtimeSession::unmute() {
if (impl_->media.state() == ConnectionState::Disconnected) {
throw Exception(Error{ErrorCode::NotConnected, "Realtime session is not connected"});
}
impl_->media.unmute();
}

bool RealtimeSession::isConnected() const noexcept { return impl_->media.isConnected(); }
ConnectionState RealtimeSession::connectionState() const noexcept { return impl_->media.state(); }

Expand Down Expand Up @@ -447,7 +507,7 @@ std::unique_ptr<RealtimeSession> connectRealtime(std::shared_ptr<ClientConfig> c
impl->signaling->sendPrompt(initial.prompt->text, initial.prompt->enhance, kPromptTimeout);
}

impl->media.connectRoom(info, source, options.model.fps);
impl->media.connectRoom(info, source, options.model.fps, options.startMuted);

// With no explicit initial state, a passthrough set_image kicks off generation.
if (!hasInitialState) {
Expand Down
Loading