From a91f6c43647fbdb1c4166140fdd94e0093729534 Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Fri, 26 Jun 2026 13:16:42 -0400 Subject: [PATCH] feat(data-collection): gate HTTP request data collection --- sentry_sdk/integrations/_asgi_common.py | 48 +++++++++++-- sentry_sdk/integrations/_wsgi_common.py | 84 ++++++++++++++++++++-- sentry_sdk/integrations/aiohttp.py | 40 ++++++++--- sentry_sdk/integrations/asgi.py | 4 +- sentry_sdk/integrations/aws_lambda.py | 30 ++++++-- sentry_sdk/integrations/django/__init__.py | 6 +- sentry_sdk/integrations/django/asgi.py | 4 +- sentry_sdk/integrations/fastapi.py | 12 +++- sentry_sdk/integrations/flask.py | 6 +- sentry_sdk/integrations/gcp.py | 24 +++++-- sentry_sdk/integrations/litestar.py | 13 +++- sentry_sdk/integrations/pyramid.py | 8 ++- sentry_sdk/integrations/quart.py | 42 ++++++++--- sentry_sdk/integrations/sanic.py | 21 ++++-- sentry_sdk/integrations/starlette.py | 19 +++-- sentry_sdk/integrations/starlite.py | 13 +++- sentry_sdk/integrations/tornado.py | 23 ++++-- sentry_sdk/integrations/wsgi.py | 38 +++++++--- 18 files changed, 350 insertions(+), 85 deletions(-) diff --git a/sentry_sdk/integrations/_asgi_common.py b/sentry_sdk/integrations/_asgi_common.py index eda75a2926..81b6aac238 100644 --- a/sentry_sdk/integrations/_asgi_common.py +++ b/sentry_sdk/integrations/_asgi_common.py @@ -1,8 +1,10 @@ import urllib from typing import TYPE_CHECKING +import sentry_sdk +from sentry_sdk.data_collection import scrub_query_string from sentry_sdk.integrations._wsgi_common import _filter_headers -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info, should_send_default_pii if TYPE_CHECKING: from typing import Any, Dict, Optional, Union @@ -93,14 +95,29 @@ def _get_request_data(asgi_scope: "Any") -> "Dict[str, Any]": request_data["headers"] = headers = _filter_headers( _get_headers(asgi_scope), ) - request_data["query_string"] = _get_query(asgi_scope) + + # Event request.query_string is set unconditionally in legacy mode. When + # data_collection is set explicitly, the query_params behavior governs + # whether/how it is collected. + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + raw_query = _get_query(asgi_scope) + scrubbed_query = ( + scrub_query_string(raw_query, dc.query_params) + if raw_query is not None + else None + ) + if scrubbed_query is not None: + request_data["query_string"] = scrubbed_query + else: + request_data["query_string"] = _get_query(asgi_scope) request_data["url"] = _get_url( asgi_scope, "http" if ty == "http" else "ws", headers.get("host") ) client = asgi_scope.get("client") - if client and should_send_default_pii(): + if client and should_collect_user_info(): request_data["env"] = {"REMOTE_ADDR": _get_ip(asgi_scope)} return request_data @@ -121,7 +138,28 @@ def _get_request_attributes(asgi_scope: "Any") -> "dict[str, Any]": for header, value in headers.items(): attributes[f"http.request.header.{header.lower()}"] = value - if should_send_default_pii(): + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + url_without_query_string = _get_url( + asgi_scope, "http" if ty == "http" else "ws", headers.get("host") + ) + raw_query = _get_query(asgi_scope) + scrubbed_query = ( + scrub_query_string(raw_query, dc.query_params) + if raw_query is not None + else None + ) + if scrubbed_query is not None: + attributes["http.query"] = scrubbed_query + attributes["url.full"] = f"{url_without_query_string}?{scrubbed_query}" + else: + attributes["url.full"] = url_without_query_string + # url.path never contains a query string, so it is unaffected by + # query_params and is collected as technical context. + attributes["url.path"] = asgi_scope.get("root_path", "") + asgi_scope.get( + "path", "" + ) + elif should_send_default_pii(): query = _get_query(asgi_scope) if query: attributes["http.query"] = query @@ -140,7 +178,7 @@ def _get_request_attributes(asgi_scope: "Any") -> "dict[str, Any]": ) client = asgi_scope.get("client") - if client and should_send_default_pii(): + if client and should_collect_user_info(): ip = _get_ip(asgi_scope) attributes["client.address"] = ip diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index cf1a365209..ff08a96e8f 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -4,6 +4,14 @@ import sentry_sdk from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE +from sentry_sdk.data_collection import ( + BODY_TYPE_INCOMING_REQUEST, + COLLECTION_OFF, + apply_key_value_collection, + filter_request_headers, + scrub_query_string, + should_collect_body_type, +) from sentry_sdk.scope import should_send_default_pii from sentry_sdk.utils import AnnotatedValue, logger @@ -90,15 +98,34 @@ def extract_into_event(self, event: "Event") -> None: if not client.is_active(): return + dc = client.data_collection + data: "Optional[Union[AnnotatedValue, Dict[str, Any]]]" = None content_length = self.content_length() request_info = event.get("request", {}) - if should_send_default_pii(): + # Cookies. When data_collection is set explicitly, collect according to + # the cookies behavior (default denyList scrubs sensitive cookie values); + # otherwise fall back to the legacy send_default_pii gate. + if dc.explicit: + if dc.cookies.mode != COLLECTION_OFF: + request_info["cookies"] = apply_key_value_collection( + dict(self.cookies()), dc.cookies + ) + elif should_send_default_pii(): request_info["cookies"] = dict(self.cookies()) - if not request_body_within_bounds(client, content_length): + # Request body. When data_collection is set explicitly, only collect the + # incoming request body if that body type is enabled; size is still + # bounded by max_request_body_size. + collect_body = True + if dc.explicit: + collect_body = should_collect_body_type(dc, BODY_TYPE_INCOMING_REQUEST) + + if not collect_body: + data = None + elif not request_body_within_bounds(client, content_length): data = AnnotatedValue.removed_because_over_size_limit() else: # First read the raw body data @@ -213,21 +240,68 @@ def _filter_headers( headers: "Mapping[str, str]", use_annotated_value: bool = True, ) -> "Mapping[str, Union[AnnotatedValue, str]]": - if should_send_default_pii(): - return headers - substitute: "Union[AnnotatedValue, str]" = ( SENSITIVE_DATA_SUBSTITUTE if not use_annotated_value else AnnotatedValue.removed_because_over_size_limit() ) + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + # Apply the configured request-header collection behavior (default + # denyList scrubs sensitive header values; the raw Cookie/Set-Cookie + # header is always filtered). + return filter_request_headers( + headers, dc.http_headers.request, substitute=substitute + ) + + # Legacy behavior (data_collection not set explicitly). + if should_send_default_pii(): + return headers + return { k: (v if k.upper().replace("-", "_") not in SENSITIVE_HEADERS else substitute) for k, v in headers.items() } +def collect_query_string( + raw_query_string: "Optional[str]", +) -> "Optional[str]": + """ + Return the (possibly scrubbed) query string to attach to span attributes + (``http.query`` / ``url.query`` / the query portion of ``url.full``), or + ``None`` if the query string should not be collected. + + When ``data_collection`` is set explicitly, the ``query_params`` behavior + governs collection/scrubbing. Otherwise the legacy ``send_default_pii`` gate + applies (preserving current behavior). + """ + if not raw_query_string: + return None + + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + return scrub_query_string(raw_query_string, dc.query_params) + + if should_send_default_pii(): + return raw_query_string + return None + + +def should_collect_url() -> bool: + """ + Whether to collect non-query URL attributes (``url.full`` base and + ``url.path``). These never contain query strings, so they are treated as + technical context and collected whenever ``data_collection`` is set + explicitly. Otherwise the legacy ``send_default_pii`` gate applies. + """ + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + return True + return should_send_default_pii() + + def _in_http_status_code_range( code: object, code_ranges: "list[HttpStatusCodeRange]" ) -> bool: diff --git a/sentry_sdk/integrations/aiohttp.py b/sentry_sdk/integrations/aiohttp.py index d22f3a745b..cfb4cd33f2 100644 --- a/sentry_sdk/integrations/aiohttp.py +++ b/sentry_sdk/integrations/aiohttp.py @@ -5,6 +5,7 @@ import sentry_sdk from sentry_sdk.api import continue_trace from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS +from sentry_sdk.data_collection import scrub_query_string from sentry_sdk.integrations import ( _DEFAULT_FAILED_REQUEST_STATUS_CODES, DidNotEnable, @@ -13,10 +14,12 @@ ) from sentry_sdk.integrations._wsgi_common import ( _filter_headers, + collect_query_string, request_body_within_bounds, + should_collect_url, ) from sentry_sdk.integrations.logging import ignore_logger -from sentry_sdk.scope import Scope, should_send_default_pii +from sentry_sdk.scope import Scope, should_collect_user_info from sentry_sdk.sessions import track_session from sentry_sdk.traces import ( SOURCE_FOR_STYLE as SEGMENT_SOURCE_FOR_STYLE, @@ -159,8 +162,8 @@ async def sentry_app_handle( header_value ) - url_attributes = {} - if should_send_default_pii(): + url_attributes: "dict[str, Any]" = {} + if should_collect_url(): url_attributes["url.full"] = "%s://%s%s" % ( request.scheme, request.host, @@ -168,11 +171,12 @@ async def sentry_app_handle( ) url_attributes["url.path"] = request.path - if request.query_string: - url_attributes["url.query"] = request.query_string + query = collect_query_string(request.query_string) + if query: + url_attributes["url.query"] = query client_address_attributes = {} - if should_send_default_pii() and request.remote: + if should_collect_user_info() and request.remote: client_address_attributes["client.address"] = request.remote scope.set_attribute( SPANDATA.USER_IP_ADDRESS, request.remote @@ -358,15 +362,18 @@ async def on_request_start( "sentry.origin": AioHttpIntegration.origin, "http.request.method": method, } - if parsed_url is not None and should_send_default_pii(): + if parsed_url is not None and should_collect_url(): attributes["url.full"] = parsed_url.url attributes["url.path"] = params.url.path - if parsed_url.query: - attributes["url.query"] = parsed_url.query if parsed_url.fragment: attributes["url.fragment"] = parsed_url.fragment + if parsed_url is not None: + query = collect_query_string(parsed_url.query) + if query: + attributes["url.query"] = query + span = sentry_sdk.traces.start_span(name=span_name, attributes=attributes) else: legacy_span = sentry_sdk.start_span( @@ -458,7 +465,20 @@ def aiohttp_processor( request.path, ) - request_info["query_string"] = request.query_string + # Event request.query_string is set unconditionally in legacy mode; + # when data_collection is explicit it is governed by query_params. + query_string = request.query_string + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + scrubbed_qs = ( + scrub_query_string(query_string, dc.query_params) + if query_string + else None + ) + if scrubbed_qs is not None: + request_info["query_string"] = scrubbed_qs + else: + request_info["query_string"] = query_string request_info["method"] = request.method request_info["env"] = {"REMOTE_ADDR": request.remote} request_info["headers"] = _filter_headers(dict(request.headers)) diff --git a/sentry_sdk/integrations/asgi.py b/sentry_sdk/integrations/asgi.py index f0470e33fc..63f82af3e8 100644 --- a/sentry_sdk/integrations/asgi.py +++ b/sentry_sdk/integrations/asgi.py @@ -24,7 +24,7 @@ DEFAULT_HTTP_METHODS_TO_CAPTURE, nullcontext, ) -from sentry_sdk.scope import Scope, should_send_default_pii +from sentry_sdk.scope import Scope, should_collect_user_info from sentry_sdk.sessions import track_session from sentry_sdk.traces import ( SOURCE_FOR_STYLE as SEGMENT_SOURCE_FOR_STYLE, @@ -248,7 +248,7 @@ async def _run_app( "network.protocol.name": ty, } - if scope.get("client") and should_send_default_pii(): + if scope.get("client") and should_collect_user_info(): sentry_scope.set_attribute( SPANDATA.USER_IP_ADDRESS, _get_ip(scope) ) diff --git a/sentry_sdk/integrations/aws_lambda.py b/sentry_sdk/integrations/aws_lambda.py index c7fe77714a..6aca8f9e94 100644 --- a/sentry_sdk/integrations/aws_lambda.py +++ b/sentry_sdk/integrations/aws_lambda.py @@ -11,13 +11,18 @@ import sentry_sdk from sentry_sdk.api import continue_trace from sentry_sdk.consts import OP +from sentry_sdk.data_collection import COLLECTION_OFF, apply_key_value_collection from sentry_sdk.integrations import Integration -from sentry_sdk.integrations._wsgi_common import _filter_headers +from sentry_sdk.integrations._wsgi_common import _filter_headers, collect_query_string from sentry_sdk.integrations.cloud_resource_context import ( CLOUD_PLATFORM, CLOUD_PROVIDER, ) -from sentry_sdk.scope import Scope, should_send_default_pii +from sentry_sdk.scope import ( + Scope, + should_collect_user_info, + should_send_default_pii, +) from sentry_sdk.traces import SegmentSource from sentry_sdk.tracing import TransactionSource from sentry_sdk.tracing_utils import has_span_streaming_enabled @@ -164,10 +169,12 @@ def sentry_handler( "httpMethod" ] - if should_send_default_pii() and "queryStringParameters" in request_data: + if "queryStringParameters" in request_data: qs = request_data["queryStringParameters"] if qs: - additional_attributes["url.query"] = urlencode(qs) + query_string = collect_query_string(urlencode(qs)) + if query_string: + additional_attributes["url.query"] = query_string sampling_context = { "aws_event": aws_event, @@ -409,12 +416,22 @@ def event_processor( request["url"] = _get_url(aws_event, aws_context) if "queryStringParameters" in aws_event: - request["query_string"] = aws_event["queryStringParameters"] + # Event request.query_string is set unconditionally in legacy mode; + # when data_collection is explicit it is governed by query_params. + qs = aws_event["queryStringParameters"] + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + if qs and dc.query_params.mode != COLLECTION_OFF: + request["query_string"] = apply_key_value_collection( + qs, dc.query_params + ) + else: + request["query_string"] = qs if "headers" in aws_event: request["headers"] = _filter_headers(aws_event["headers"]) - if should_send_default_pii(): + if should_collect_user_info(): user_info = sentry_event.setdefault("user", {}) identity = aws_event.get("identity") @@ -429,6 +446,7 @@ def event_processor( if ip is not None: user_info.setdefault("ip_address", ip) + if should_send_default_pii(): if "body" in aws_event: request["data"] = aws_event.get("body", "") else: diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py index 361b60079d..ba9a9843cd 100644 --- a/sentry_sdk/integrations/django/__init__.py +++ b/sentry_sdk/integrations/django/__init__.py @@ -13,7 +13,7 @@ ) from sentry_sdk.integrations.logging import ignore_logger from sentry_sdk.integrations.wsgi import SentryWsgiMiddleware -from sentry_sdk.scope import add_global_event_processor, should_send_default_pii +from sentry_sdk.scope import add_global_event_processor, should_collect_user_info from sentry_sdk.serializer import add_global_repr_processor, add_repr_sequence_type from sentry_sdk.traces import StreamedSpan from sentry_sdk.tracing import SOURCE_FOR_STYLE, TransactionSource @@ -468,7 +468,7 @@ def _after_get_response(request: "WSGIRequest") -> None: _attempt_resolve_again(request, scope, integration.transaction_style) span_streaming = has_span_streaming_enabled(client.options) - if span_streaming and should_send_default_pii(): + if span_streaming and should_collect_user_info(): user = getattr(request, "user", None) # Evaluating a SimpleLazyObject in an async view can raise django.core.exceptions.SynchronousOnlyOperation. @@ -544,7 +544,7 @@ def wsgi_request_event_processor(event: "Event", hint: "dict[str, Any]") -> "Eve with capture_internal_exceptions(): DjangoRequestExtractor(request).extract_into_event(event) - if should_send_default_pii(): + if should_collect_user_info(): with capture_internal_exceptions(): _set_user_info(request, event) diff --git a/sentry_sdk/integrations/django/asgi.py b/sentry_sdk/integrations/django/asgi.py index 43faffb5be..5163035b23 100644 --- a/sentry_sdk/integrations/django/asgi.py +++ b/sentry_sdk/integrations/django/asgi.py @@ -16,7 +16,7 @@ import sentry_sdk from sentry_sdk.consts import OP from sentry_sdk.integrations.asgi import SentryAsgiMiddleware -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info from sentry_sdk.traces import StreamedSpan from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.utils import ( @@ -70,7 +70,7 @@ def asgi_request_event_processor(event: "Event", hint: "dict[str, Any]") -> "Eve with capture_internal_exceptions(): DjangoRequestExtractor(request).extract_into_event(event) - if should_send_default_pii(): + if should_collect_user_info(): with capture_internal_exceptions(): _set_user_info(request, event) diff --git a/sentry_sdk/integrations/fastapi.py b/sentry_sdk/integrations/fastapi.py index c7b97c88b1..d2033c84d0 100644 --- a/sentry_sdk/integrations/fastapi.py +++ b/sentry_sdk/integrations/fastapi.py @@ -5,6 +5,7 @@ import sentry_sdk from sentry_sdk.consts import SPANDATA +from sentry_sdk.data_collection import COLLECTION_OFF, apply_key_value_collection from sentry_sdk.integrations import DidNotEnable from sentry_sdk.scope import should_send_default_pii from sentry_sdk.traces import StreamedSpan, get_current_span @@ -118,8 +119,15 @@ def event_processor(event: "Event", hint: "Dict[str, Any]") -> "Event": # Extract information from request request_info = event.get("request", {}) if info: - if "cookies" in info and should_send_default_pii(): - request_info["cookies"] = info["cookies"] + if "cookies" in info: + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + if dc.cookies.mode != COLLECTION_OFF: + request_info["cookies"] = apply_key_value_collection( + info["cookies"], dc.cookies + ) + elif should_send_default_pii(): + request_info["cookies"] = info["cookies"] if "data" in info: request_info["data"] = info["data"] event["request"] = deepcopy(request_info) diff --git a/sentry_sdk/integrations/flask.py b/sentry_sdk/integrations/flask.py index 1902091fbf..570a618cd8 100644 --- a/sentry_sdk/integrations/flask.py +++ b/sentry_sdk/integrations/flask.py @@ -7,7 +7,7 @@ RequestExtractor, ) from sentry_sdk.integrations.wsgi import SentryWsgiMiddleware -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info from sentry_sdk.tracing import SOURCE_FOR_STYLE from sentry_sdk.utils import ( capture_internal_exceptions, @@ -156,7 +156,7 @@ def _request_started(app: "Flask", **kwargs: "Any") -> None: scope = sentry_sdk.get_isolation_scope() - if should_send_default_pii(): + if should_collect_user_info(): with capture_internal_exceptions(): user_properties = _get_flask_user_properties() if user_properties: @@ -208,7 +208,7 @@ def inner(event: "Event", hint: "dict[str, Any]") -> "Event": with capture_internal_exceptions(): FlaskRequestExtractor(request).extract_into_event(event) - if should_send_default_pii(): + if should_collect_user_info(): with capture_internal_exceptions(): _add_user_to_event(event) diff --git a/sentry_sdk/integrations/gcp.py b/sentry_sdk/integrations/gcp.py index 91a62b3a81..642adb41c4 100644 --- a/sentry_sdk/integrations/gcp.py +++ b/sentry_sdk/integrations/gcp.py @@ -8,8 +8,9 @@ import sentry_sdk from sentry_sdk.api import continue_trace from sentry_sdk.consts import OP +from sentry_sdk.data_collection import scrub_query_string from sentry_sdk.integrations import Integration -from sentry_sdk.integrations._wsgi_common import _filter_headers +from sentry_sdk.integrations._wsgi_common import _filter_headers, collect_query_string from sentry_sdk.integrations.cloud_resource_context import CLOUD_PROVIDER from sentry_sdk.scope import Scope, should_send_default_pii from sentry_sdk.traces import SegmentSource @@ -100,10 +101,12 @@ def sentry_func( if hasattr(gcp_event, "method"): additional_attributes["http.request.method"] = gcp_event.method - if should_send_default_pii() and hasattr(gcp_event, "query_string"): - additional_attributes["url.query"] = gcp_event.query_string.decode( - "utf-8", errors="replace" + if hasattr(gcp_event, "query_string"): + query_string = collect_query_string( + gcp_event.query_string.decode("utf-8", errors="replace") ) + if query_string: + additional_attributes["url.query"] = query_string sampling_context = { "gcp_env": { @@ -235,9 +238,16 @@ def event_processor(event: "Event", hint: "Hint") -> "Optional[Event]": request["method"] = gcp_event.method if hasattr(gcp_event, "query_string"): - request["query_string"] = gcp_event.query_string.decode( - "utf-8", errors="replace" - ) + # Event request.query_string is set unconditionally in legacy mode; + # when data_collection is explicit it is governed by query_params. + qs = gcp_event.query_string.decode("utf-8", errors="replace") + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + scrubbed_qs = scrub_query_string(qs, dc.query_params) if qs else None + if scrubbed_qs is not None: + request["query_string"] = scrubbed_qs + else: + request["query_string"] = qs if hasattr(gcp_event, "headers"): request["headers"] = _filter_headers(gcp_event.headers) diff --git a/sentry_sdk/integrations/litestar.py b/sentry_sdk/integrations/litestar.py index f0c90a7921..95972b64b9 100644 --- a/sentry_sdk/integrations/litestar.py +++ b/sentry_sdk/integrations/litestar.py @@ -3,6 +3,7 @@ import sentry_sdk from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.data_collection import COLLECTION_OFF, apply_key_value_collection from sentry_sdk.integrations import ( _DEFAULT_FAILED_REQUEST_STATUS_CODES, DidNotEnable, @@ -10,7 +11,7 @@ ) from sentry_sdk.integrations.asgi import SentryAsgiMiddleware from sentry_sdk.integrations.logging import ignore_logger -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info, should_send_default_pii from sentry_sdk.tracing import SOURCE_FOR_STYLE, TransactionSource from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.utils import ( @@ -312,7 +313,13 @@ async def handle_wrapper( def event_processor(event: "Event", _: "Hint") -> "Event": request_info = event.get("request", {}) request_info["content_length"] = len(scope.get("_body", b"")) - if should_send_default_pii(): + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + if dc.cookies.mode != COLLECTION_OFF: + request_info["cookies"] = apply_key_value_collection( + dict(extracted_request_data["cookies"]), dc.cookies + ) + elif should_send_default_pii(): request_info["cookies"] = extracted_request_data["cookies"] if request_data is not None: request_info["data"] = request_data @@ -341,7 +348,7 @@ def retrieve_user_from_scope(scope: "LitestarScope") -> "Optional[dict[str, Any] @ensure_integration_enabled(LitestarIntegration) def exception_handler(exc: Exception, scope: "LitestarScope") -> None: user_info: "Optional[dict[str, Any]]" = None - if should_send_default_pii(): + if should_collect_user_info(): user_info = retrieve_user_from_scope(scope) if user_info and isinstance(user_info, dict): sentry_scope = sentry_sdk.get_isolation_scope() diff --git a/sentry_sdk/integrations/pyramid.py b/sentry_sdk/integrations/pyramid.py index 6837d8345c..0e733c316d 100644 --- a/sentry_sdk/integrations/pyramid.py +++ b/sentry_sdk/integrations/pyramid.py @@ -7,7 +7,7 @@ from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.integrations._wsgi_common import RequestExtractor from sentry_sdk.integrations.wsgi import SentryWsgiMiddleware -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info from sentry_sdk.traces import SOURCE_FOR_STYLE as SEGMENT_SOURCE_FOR_STYLE from sentry_sdk.tracing import SOURCE_FOR_STYLE as TRANSACTION_SOURCE_FOR_STYLE from sentry_sdk.tracing_utils import has_span_streaming_enabled @@ -86,7 +86,9 @@ def sentry_patched_call_view( scope = sentry_sdk.get_isolation_scope() - if should_send_default_pii() and has_span_streaming_enabled(client.options): + if should_collect_user_info() and has_span_streaming_enabled( + client.options + ): user_id = authenticated_userid(request) if user_id: scope.set_user({"id": user_id}) @@ -229,7 +231,7 @@ def pyramid_event_processor(event: "Event", hint: "Dict[str, Any]") -> "Event": with capture_internal_exceptions(): PyramidRequestExtractor(request).extract_into_event(event) - if should_send_default_pii(): + if should_collect_user_info(): with capture_internal_exceptions(): user_info = event.setdefault("user", {}) user_info.setdefault("id", authenticated_userid(request)) diff --git a/sentry_sdk/integrations/quart.py b/sentry_sdk/integrations/quart.py index 6a5603d825..fa11c0bb55 100644 --- a/sentry_sdk/integrations/quart.py +++ b/sentry_sdk/integrations/quart.py @@ -5,10 +5,15 @@ from typing import TYPE_CHECKING import sentry_sdk +from sentry_sdk.data_collection import scrub_query_string from sentry_sdk.integrations import DidNotEnable, Integration -from sentry_sdk.integrations._wsgi_common import _filter_headers +from sentry_sdk.integrations._wsgi_common import ( + _filter_headers, + collect_query_string, + should_collect_url, +) from sentry_sdk.integrations.asgi import SentryAsgiMiddleware -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info from sentry_sdk.traces import SOURCE_FOR_STYLE as SEGMENT_SOURCE_FOR_STYLE from sentry_sdk.traces import StreamedSpan, get_current_span from sentry_sdk.tracing import SOURCE_FOR_STYLE as TRANSACTION_SOURCE_FOR_STYLE @@ -203,13 +208,16 @@ async def _request_websocket_started(app: "Quart", **kwargs: "Any") -> None: segment.set_attributes(header_attributes) - if should_send_default_pii(): + if should_collect_url(): segment.set_attribute("url.full", request_websocket.url) - segment.set_attribute( - "url.query", - request_websocket.query_string.decode("utf-8", errors="replace"), - ) + query_string = collect_query_string( + request_websocket.query_string.decode("utf-8", errors="replace") + ) + if query_string: + segment.set_attribute("url.query", query_string) + + if should_collect_user_info(): user_properties = {} if len(request_websocket.access_route) >= 1: segment.set_attribute( @@ -245,11 +253,27 @@ def inner(event: "Event", hint: "dict[str, Any]") -> "Event": request_info = event.setdefault("request", {}) request_info["url"] = request.url - request_info["query_string"] = request.query_string + # Event request.query_string is set unconditionally in legacy mode; + # when data_collection is explicit it is governed by query_params. + query_string = request.query_string + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + scrubbed_qs = ( + scrub_query_string( + query_string.decode("utf-8", errors="replace"), + dc.query_params, + ) + if query_string + else None + ) + if scrubbed_qs is not None: + request_info["query_string"] = scrubbed_qs + else: + request_info["query_string"] = query_string request_info["method"] = request.method request_info["headers"] = _filter_headers(dict(request.headers)) - if should_send_default_pii(): + if should_collect_user_info(): if len(request.access_route) >= 1: request_info["env"] = {"REMOTE_ADDR": request.access_route[0]} diff --git a/sentry_sdk/integrations/sanic.py b/sentry_sdk/integrations/sanic.py index 468a1323a8..02d59a3571 100644 --- a/sentry_sdk/integrations/sanic.py +++ b/sentry_sdk/integrations/sanic.py @@ -8,10 +8,11 @@ import sentry_sdk from sentry_sdk import continue_trace from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.data_collection import scrub_query_string from sentry_sdk.integrations import DidNotEnable, Integration, _check_minimum_version from sentry_sdk.integrations._wsgi_common import RequestExtractor, _filter_headers from sentry_sdk.integrations.logging import ignore_logger -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info from sentry_sdk.traces import SegmentSource, StreamedSpan from sentry_sdk.tracing import TransactionSource from sentry_sdk.tracing_utils import has_span_streaming_enabled @@ -192,7 +193,7 @@ async def _context_enter(request: "Request") -> None: sentry_sdk.traces.continue_trace(dict(request.headers)) scope.set_custom_sampling_context({"sanic_request": request}) - if should_send_default_pii() and request.remote_addr: + if should_collect_user_info() and request.remote_addr: scope.set_attribute(SPANDATA.USER_IP_ADDRESS, request.remote_addr) span = sentry_sdk.traces.start_span( @@ -388,7 +389,7 @@ def _get_request_attributes(request: "Request") -> "Dict[str, Any]": if urlparts.scheme: attributes[SPANDATA.NETWORK_PROTOCOL_NAME] = urlparts.scheme - if should_send_default_pii() and request.remote_addr: + if should_collect_user_info() and request.remote_addr: attributes[SPANDATA.CLIENT_ADDRESS] = request.remote_addr return attributes @@ -419,7 +420,19 @@ def sanic_processor(event: "Event", hint: "Optional[Hint]") -> "Optional[Event]" urlparts.path, ) - request_info["query_string"] = urlparts.query + # Event request.query_string is set unconditionally in legacy mode; + # when data_collection is explicit it is governed by query_params. + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + scrubbed_qs = ( + scrub_query_string(urlparts.query, dc.query_params) + if urlparts.query + else None + ) + if scrubbed_qs is not None: + request_info["query_string"] = scrubbed_qs + else: + request_info["query_string"] = urlparts.query request_info["method"] = request.method request_info["env"] = {"REMOTE_ADDR": request.remote_addr} request_info["headers"] = _filter_headers(dict(request.headers)) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index 1482efc25b..7cf2bd83c0 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -10,6 +10,7 @@ import sentry_sdk from sentry_sdk._types import OVER_SIZE_LIMIT_SUBSTITUTE from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.data_collection import COLLECTION_OFF, apply_key_value_collection from sentry_sdk.integrations import ( _DEFAULT_FAILED_REQUEST_STATUS_CODES, DidNotEnable, @@ -22,7 +23,7 @@ request_body_within_bounds, ) from sentry_sdk.integrations.asgi import SentryAsgiMiddleware -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info, should_send_default_pii from sentry_sdk.traces import StreamedSpan, get_current_span from sentry_sdk.tracing import ( SOURCE_FOR_STYLE, @@ -354,7 +355,7 @@ def _add_user_to_sentry_scope(scope: "Dict[str, Any]") -> None: if "user" not in scope: return - if not should_send_default_pii(): + if not should_collect_user_info(): return user_info: "Dict[str, Any]" = {} @@ -703,7 +704,11 @@ def extract_cookies_from_request( self: "StarletteRequestExtractor", ) -> "Optional[Dict[str, Any]]": cookies: "Optional[Dict[str, Any]]" = None - if should_send_default_pii(): + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + if dc.cookies.mode != COLLECTION_OFF: + cookies = apply_key_value_collection(dict(self.cookies()), dc.cookies) + elif should_send_default_pii(): cookies = self.cookies() return cookies @@ -717,7 +722,13 @@ async def extract_request_info( with capture_internal_exceptions(): # Add cookies - if should_send_default_pii(): + dc = client.data_collection + if dc.explicit: + if dc.cookies.mode != COLLECTION_OFF: + request_info["cookies"] = apply_key_value_collection( + dict(self.cookies()), dc.cookies + ) + elif should_send_default_pii(): request_info["cookies"] = self.cookies() # If there is no body, just return the cookies diff --git a/sentry_sdk/integrations/starlite.py b/sentry_sdk/integrations/starlite.py index 1c9328a09d..f0f666a961 100644 --- a/sentry_sdk/integrations/starlite.py +++ b/sentry_sdk/integrations/starlite.py @@ -2,9 +2,10 @@ import sentry_sdk from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.data_collection import COLLECTION_OFF, apply_key_value_collection from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.integrations.asgi import SentryAsgiMiddleware -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info, should_send_default_pii from sentry_sdk.tracing import SOURCE_FOR_STYLE, TransactionSource from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.utils import ( @@ -262,7 +263,13 @@ async def handle_wrapper( def event_processor(event: "Event", _: "Hint") -> "Event": request_info = event.get("request", {}) request_info["content_length"] = len(scope.get("_body", b"")) - if should_send_default_pii(): + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + if dc.cookies.mode != COLLECTION_OFF: + request_info["cookies"] = apply_key_value_collection( + dict(extracted_request_data["cookies"]), dc.cookies + ) + elif should_send_default_pii(): request_info["cookies"] = extracted_request_data["cookies"] if request_data is not None: request_info["data"] = request_data @@ -299,7 +306,7 @@ def retrieve_user_from_scope(scope: "StarliteScope") -> "Optional[dict[str, Any] @ensure_integration_enabled(StarliteIntegration) def exception_handler(exc: Exception, scope: "StarliteScope", _: "State") -> None: user_info: "Optional[dict[str, Any]]" = None - if should_send_default_pii(): + if should_collect_user_info(): user_info = retrieve_user_from_scope(scope) if user_info and isinstance(user_info, dict): sentry_scope = sentry_sdk.get_isolation_scope() diff --git a/sentry_sdk/integrations/tornado.py b/sentry_sdk/integrations/tornado.py index 0e0d465dd7..6bd90bdc46 100644 --- a/sentry_sdk/integrations/tornado.py +++ b/sentry_sdk/integrations/tornado.py @@ -5,6 +5,7 @@ import sentry_sdk from sentry_sdk.api import continue_trace from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.data_collection import scrub_query_string from sentry_sdk.integrations import DidNotEnable, Integration, _check_minimum_version from sentry_sdk.integrations._wsgi_common import ( RequestExtractor, @@ -13,7 +14,7 @@ request_body_within_bounds, ) from sentry_sdk.integrations.logging import ignore_logger -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.scope import should_collect_user_info from sentry_sdk.traces import SegmentSource, StreamedSpan from sentry_sdk.tracing import TransactionSource from sentry_sdk.tracing_utils import has_span_streaming_enabled @@ -130,7 +131,7 @@ def _handle_request_impl(self: "RequestHandler") -> "Generator[None, None, None] sentry_sdk.traces.continue_trace(dict(headers)) scope.set_custom_sampling_context({"tornado_request": self.request}) - if should_send_default_pii() and self.request.remote_ip: + if should_collect_user_info() and self.request.remote_ip: scope.set_attribute(SPANDATA.USER_IP_ADDRESS, self.request.remote_ip) span_ctx = sentry_sdk.traces.start_span( @@ -205,7 +206,7 @@ def _get_request_attributes(request: "Any") -> "Dict[str, Any]": if request.protocol: attributes[SPANDATA.NETWORK_PROTOCOL_NAME] = request.protocol - if should_send_default_pii() and request.remote_ip: + if should_collect_user_info() and request.remote_ip: attributes[SPANDATA.CLIENT_ADDRESS] = request.remote_ip with capture_internal_exceptions(): @@ -271,12 +272,24 @@ def tornado_processor(event: "Event", hint: "dict[str, Any]") -> "Event": request.path, ) - request_info["query_string"] = request.query + # Event request.query_string is set unconditionally in legacy mode; + # when data_collection is explicit it is governed by query_params. + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + scrubbed_qs = ( + scrub_query_string(request.query, dc.query_params) + if request.query + else None + ) + if scrubbed_qs is not None: + request_info["query_string"] = scrubbed_qs + else: + request_info["query_string"] = request.query request_info["method"] = request.method request_info["env"] = {"REMOTE_ADDR": request.remote_ip} request_info["headers"] = _filter_headers(dict(request.headers)) - if should_send_default_pii(): + if should_collect_user_info(): try: current_user = handler.current_user except Exception: diff --git a/sentry_sdk/integrations/wsgi.py b/sentry_sdk/integrations/wsgi.py index e776ed915a..4a6b92dc0e 100644 --- a/sentry_sdk/integrations/wsgi.py +++ b/sentry_sdk/integrations/wsgi.py @@ -6,12 +6,19 @@ from sentry_sdk._werkzeug import _get_headers, get_host from sentry_sdk.api import continue_trace from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.data_collection import scrub_query_string from sentry_sdk.integrations._wsgi_common import ( DEFAULT_HTTP_METHODS_TO_CAPTURE, _filter_headers, + collect_query_string, nullcontext, + should_collect_url, +) +from sentry_sdk.scope import ( + Scope, + should_collect_user_info, + use_isolation_scope, ) -from sentry_sdk.scope import Scope, should_send_default_pii, use_isolation_scope from sentry_sdk.sessions import track_session from sentry_sdk.traces import SegmentSource, StreamedSpan from sentry_sdk.tracing import Span, TransactionSource @@ -134,7 +141,7 @@ def __call__( ) Scope.set_custom_sampling_context({"wsgi_environ": environ}) - if should_send_default_pii(): + if should_collect_user_info(): client_ip = get_client_ip(environ) if client_ip: scope.set_attribute( @@ -241,7 +248,7 @@ def _get_environ(environ: "Dict[str, str]") -> "Iterator[Tuple[str, str]]": capture (server name, port and remote addr if pii is enabled). """ keys = ["SERVER_NAME", "SERVER_PORT"] - if should_send_default_pii(): + if should_collect_user_info(): # make debugging of proxy setup easier. Proxy headers are # in headers. keys += ["REMOTE_ADDR"] @@ -361,13 +368,25 @@ def event_processor(event: "Event", hint: "Dict[str, Any]") -> "Event": # if the code below fails halfway through we at least have some data request_info = event.setdefault("request", {}) - if should_send_default_pii(): + if should_collect_user_info(): user_info = event.setdefault("user", {}) if client_ip: user_info.setdefault("ip_address", client_ip) request_info["url"] = request_url - request_info["query_string"] = query_string + # Event request.query_string is set unconditionally in legacy mode; + # when data_collection is explicit it is governed by query_params. + dc = sentry_sdk.get_client().data_collection + if dc.explicit: + scrubbed_qs = ( + scrub_query_string(query_string, dc.query_params) + if query_string + else None + ) + if scrubbed_qs is not None: + request_info["query_string"] = scrubbed_qs + else: + request_info["query_string"] = query_string request_info["method"] = method request_info["env"] = env request_info["headers"] = headers @@ -409,15 +428,16 @@ def _get_request_attributes( except ValueError: pass - if should_send_default_pii(): + if should_collect_user_info(): client_ip = get_client_ip(environ) if client_ip: attributes["client.address"] = client_ip - query_string = environ.get("QUERY_STRING") - if query_string: - attributes["http.query"] = query_string + query_string = collect_query_string(environ.get("QUERY_STRING")) + if query_string: + attributes["http.query"] = query_string + if should_collect_url(): path = environ.get("PATH_INFO", "") if path: attributes["url.path"] = path