Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog

## Unreleased

### Features

- Add the `data_collection` option, a structured configuration that supersedes `send_default_pii` for controlling what data integrations collect automatically (user identity, cookies, HTTP headers, query params, HTTP bodies, generative AI inputs/outputs, stack frame variables, source context). See the [Data Collection spec](https://develop.sentry.dev/sdk/foundations/client/data-collection/).
- Adds `sentry_sdk.DataCollection`, `KeyValueCollectionBehavior`, `HttpHeadersCollection`, and `GenAICollection`.
- When `data_collection` is not set, behavior is derived from `send_default_pii` (now deprecated), so upgrading without configuring `data_collection` changes nothing.
- `frame_context_lines` is now configurable (previously hardcoded to 5); AI integrations' `include_prompts` becomes a per-integration override of `data_collection.gen_ai`.

## 2.63.0

### Bug Fixes 🐛
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ sentry_sdk.init(
# Set traces_sample_rate to 1.0 to capture 100%
# of traces for performance monitoring.
traces_sample_rate=1.0,

# To disable sending user data and HTTP request/response bodies, uncomment
# the line below. For more info visit:
# https://docs.sentry.io/platforms/python/configuration/options/#data_collection
# data_collection=sentry_sdk.DataCollection(user_info=False, http_bodies=[]),
)
```

Expand Down
10 changes: 10 additions & 0 deletions sentry_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

from sentry_sdk.scope import Scope # isort: skip
from sentry_sdk.client import Client # isort: skip
from sentry_sdk.data_collection import ( # isort: skip
DataCollection,
GenAICollection,
HttpHeadersCollection,
KeyValueCollectionBehavior,
)
from sentry_sdk.consts import VERSION
from sentry_sdk.transport import HttpTransport, Transport

Expand All @@ -11,6 +17,10 @@
"Hub",
"Scope",
"Client",
"DataCollection",
"GenAICollection",
"HttpHeadersCollection",
"KeyValueCollectionBehavior",
"Transport",
"HttpTransport",
"VERSION",
Expand Down
93 changes: 90 additions & 3 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
VERSION,
ClientConstructor,
)
from sentry_sdk.data_collection import (
OFF_DATA_COLLECTION,
DataCollection,
_map_from_send_default_pii,
resolve_data_collection,
)
from sentry_sdk.envelope import Envelope, Item, PayloadRef
from sentry_sdk.integrations import _DEFAULT_INTEGRATIONS, setup_integrations
from sentry_sdk.integrations.dedupe import DedupeIntegration
Expand Down Expand Up @@ -345,11 +351,11 @@ def _get_options(*args: "Optional[str]", **kwargs: "Any") -> "Dict[str, Any]":
if rv["enable_tracing"] is True and rv["traces_sample_rate"] is None:
rv["traces_sample_rate"] = 1.0

rv["data_collection"] = resolve_data_collection(rv)

if rv["event_scrubber"] is None:
rv["event_scrubber"] = EventScrubber(
send_default_pii=(
False if rv["send_default_pii"] is None else rv["send_default_pii"]
)
send_default_pii=rv["data_collection"].user_info
)

if rv["socket_options"] and not isinstance(rv["socket_options"], list):
Expand Down Expand Up @@ -425,6 +431,23 @@ def parsed_dsn(self) -> "Optional[Dsn]":
def should_send_default_pii(self) -> bool:
return False

@property
def data_collection(self) -> "DataCollection":
return OFF_DATA_COLLECTION

def should_collect_user_info(self) -> bool:
return False

def should_collect_gen_ai_inputs(
self, include_prompts: "Optional[bool]" = None
) -> bool:
return False

def should_collect_gen_ai_outputs(
self, include_prompts: "Optional[bool]" = None
) -> bool:
return False

def is_active(self) -> bool:
"""
.. versionadded:: 2.0.0
Expand Down Expand Up @@ -614,6 +637,17 @@ def _record_lost_event(
self.options["error_sampler"] = sample_all
self.options["traces_sampler"] = sample_all
self.options["profiles_sampler"] = sample_all
# data_collection was resolved in _get_options() before this
# spotlight override flipped send_default_pii on. Re-derive it so
# the should_collect_* accessors agree with should_send_default_pii()
# in DSN-less spotlight mode (only when the user did not set
# data_collection explicitly).
if not self.options["data_collection"].explicit:
self.options["data_collection"] = _map_from_send_default_pii(
True,
self.options["include_local_variables"] is not False,
self.options["include_source_context"] is not False,
)

self.session_flusher = SessionFlusher(capture_func=_capture_envelope)

Expand Down Expand Up @@ -724,6 +758,59 @@ def should_send_default_pii(self) -> bool:
"""
return self.options.get("send_default_pii") or False

@property
def data_collection(self) -> "DataCollection":
"""
Returns the resolved :class:`~sentry_sdk.data_collection.DataCollection`
config for this client.
"""
dc = self.options.get("data_collection")
return dc if dc is not None else OFF_DATA_COLLECTION

def should_collect_user_info(self) -> bool:
"""
Returns whether the SDK should automatically populate ``user.*`` fields
(id, email, username, ip_address) from instrumentation.
"""
return bool(self.data_collection.user_info)

def should_collect_gen_ai_inputs(
self, include_prompts: "Optional[bool]" = None
) -> bool:
"""
Returns whether the SDK should collect generative AI input content.

``include_prompts`` is the integration-level override (if set, it takes
precedence over the global ``data_collection.gen_ai.inputs`` setting).
"""
return self._should_collect_gen_ai_content("inputs", include_prompts)

def should_collect_gen_ai_outputs(
self, include_prompts: "Optional[bool]" = None
) -> bool:
"""
Returns whether the SDK should collect generative AI output content.

``include_prompts`` is the integration-level override (if set, it takes
precedence over the global ``data_collection.gen_ai.outputs`` setting).
"""
return self._should_collect_gen_ai_content("outputs", include_prompts)

def _should_collect_gen_ai_content(
self, direction: str, include_prompts: "Optional[bool]"
) -> bool:
dc = self.data_collection
if dc.explicit:
# Integration-level override wins over the global gen_ai setting.
if include_prompts is not None:
return include_prompts
return bool(getattr(dc.gen_ai, direction))
# Legacy (data_collection not set): preserve the historical gate
# `should_send_default_pii() and integration.include_prompts`.
# `include_prompts is None` means "no integration-level override", which
# falls back to the legacy default of True (collect when PII is on).
return self.should_send_default_pii() and (include_prompts is not False)

@property
def dsn(self) -> "Optional[str]":
"""Returns the configured DSN as string."""
Expand Down
22 changes: 22 additions & 0 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class CompressionAlgo(Enum):
TracesSampler,
TransactionProcessor,
)
from sentry_sdk.data_collection import DataCollection

# Experiments are feature flags to enable and disable certain unstable SDK
# functionality. Changing them from the defaults (`None`) in production
Expand Down Expand Up @@ -1272,6 +1273,7 @@ def __init__(
transport_queue_size: int = DEFAULT_QUEUE_SIZE,
sample_rate: float = 1.0,
send_default_pii: "Optional[bool]" = None,
data_collection: "Optional[Union[DataCollection, Dict[str, Any]]]" = None,
http_proxy: "Optional[str]" = None,
https_proxy: "Optional[str]" = None,
ignore_errors: "Sequence[Union[type, str]]" = [], # noqa: B006
Expand Down Expand Up @@ -1426,6 +1428,26 @@ def __init__(
If you enable this option, be sure to manually remove what you don't want to send using our features for
managing `Sensitive Data <https://docs.sentry.io/data-management/sensitive-data/>`_.

.. deprecated::
Use `data_collection` instead. `send_default_pii` is still honored when `data_collection` is not set.

:param data_collection: Structured configuration controlling what data integrations collect automatically,
superseding `send_default_pii`. Pass a dict or a :class:`sentry_sdk.DataCollection` instance to enable or
restrict collection per category (user identity, cookies, HTTP headers/bodies, query params, generative AI
inputs/outputs, stack frame variables, source context).

When `data_collection` is set, omitted fields use their defaults (most categories are collected, with the
sensitive denylist scrubbing values). When it is not set, the SDK derives behavior from `send_default_pii`
so that upgrading without configuring `data_collection` changes nothing. If both are set, `data_collection`
takes precedence.

Example::

sentry_sdk.init(
dsn="...",
data_collection={"user_info": False, "http_bodies": []},
)

:param event_scrubber: Scrubs the event payload for sensitive information such as cookies, sessions, and
passwords from a `denylist`.

Expand Down
Loading
Loading