Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 43 additions & 5 deletions sentry_sdk/integrations/_asgi_common.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import urllib
from typing import TYPE_CHECKING

import sentry_sdk
from sentry_sdk.data_collection import scrub_query_string
from sentry_sdk.integrations._wsgi_common import _filter_headers
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.scope import should_collect_user_info, should_send_default_pii

if TYPE_CHECKING:
from typing import Any, Dict, Optional, Union
Expand Down Expand Up @@ -93,14 +95,29 @@ def _get_request_data(asgi_scope: "Any") -> "Dict[str, Any]":
request_data["headers"] = headers = _filter_headers(
_get_headers(asgi_scope),
)
request_data["query_string"] = _get_query(asgi_scope)

# Event request.query_string is set unconditionally in legacy mode. When
# data_collection is set explicitly, the query_params behavior governs
# whether/how it is collected.
dc = sentry_sdk.get_client().data_collection
if dc.explicit:
raw_query = _get_query(asgi_scope)
scrubbed_query = (
scrub_query_string(raw_query, dc.query_params)
if raw_query is not None
else None
)
if scrubbed_query is not None:
request_data["query_string"] = scrubbed_query
else:
request_data["query_string"] = _get_query(asgi_scope)

request_data["url"] = _get_url(
asgi_scope, "http" if ty == "http" else "ws", headers.get("host")
)

client = asgi_scope.get("client")
if client and should_send_default_pii():
if client and should_collect_user_info():
request_data["env"] = {"REMOTE_ADDR": _get_ip(asgi_scope)}

return request_data
Expand All @@ -121,7 +138,28 @@ def _get_request_attributes(asgi_scope: "Any") -> "dict[str, Any]":
for header, value in headers.items():
attributes[f"http.request.header.{header.lower()}"] = value

if should_send_default_pii():
dc = sentry_sdk.get_client().data_collection
if dc.explicit:
url_without_query_string = _get_url(
asgi_scope, "http" if ty == "http" else "ws", headers.get("host")
)
raw_query = _get_query(asgi_scope)
scrubbed_query = (
scrub_query_string(raw_query, dc.query_params)
if raw_query is not None
else None
)
if scrubbed_query is not None:
attributes["http.query"] = scrubbed_query
attributes["url.full"] = f"{url_without_query_string}?{scrubbed_query}"
else:
attributes["url.full"] = url_without_query_string
# url.path never contains a query string, so it is unaffected by
# query_params and is collected as technical context.
attributes["url.path"] = asgi_scope.get("root_path", "") + asgi_scope.get(
"path", ""
)
elif should_send_default_pii():
query = _get_query(asgi_scope)
if query:
attributes["http.query"] = query
Expand All @@ -140,7 +178,7 @@ def _get_request_attributes(asgi_scope: "Any") -> "dict[str, Any]":
)

client = asgi_scope.get("client")
if client and should_send_default_pii():
if client and should_collect_user_info():
ip = _get_ip(asgi_scope)
attributes["client.address"] = ip

Expand Down
84 changes: 79 additions & 5 deletions sentry_sdk/integrations/_wsgi_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@

import sentry_sdk
from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE
from sentry_sdk.data_collection import (
BODY_TYPE_INCOMING_REQUEST,
COLLECTION_OFF,
apply_key_value_collection,
filter_request_headers,
scrub_query_string,
should_collect_body_type,
)
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.utils import AnnotatedValue, logger

Expand Down Expand Up @@ -90,15 +98,34 @@ def extract_into_event(self, event: "Event") -> None:
if not client.is_active():
return

dc = client.data_collection

data: "Optional[Union[AnnotatedValue, Dict[str, Any]]]" = None

content_length = self.content_length()
request_info = event.get("request", {})

if should_send_default_pii():
# Cookies. When data_collection is set explicitly, collect according to
# the cookies behavior (default denyList scrubs sensitive cookie values);
# otherwise fall back to the legacy send_default_pii gate.
if dc.explicit:
if dc.cookies.mode != COLLECTION_OFF:
request_info["cookies"] = apply_key_value_collection(
dict(self.cookies()), dc.cookies
)
elif should_send_default_pii():
request_info["cookies"] = dict(self.cookies())

if not request_body_within_bounds(client, content_length):
# Request body. When data_collection is set explicitly, only collect the
# incoming request body if that body type is enabled; size is still
# bounded by max_request_body_size.
collect_body = True
if dc.explicit:
collect_body = should_collect_body_type(dc, BODY_TYPE_INCOMING_REQUEST)

if not collect_body:
data = None
elif not request_body_within_bounds(client, content_length):
data = AnnotatedValue.removed_because_over_size_limit()
else:
# First read the raw body data
Expand Down Expand Up @@ -213,21 +240,68 @@ def _filter_headers(
headers: "Mapping[str, str]",
use_annotated_value: bool = True,
) -> "Mapping[str, Union[AnnotatedValue, str]]":
if should_send_default_pii():
return headers

substitute: "Union[AnnotatedValue, str]" = (
SENSITIVE_DATA_SUBSTITUTE
if not use_annotated_value
else AnnotatedValue.removed_because_over_size_limit()
)

dc = sentry_sdk.get_client().data_collection
if dc.explicit:
# Apply the configured request-header collection behavior (default
# denyList scrubs sensitive header values; the raw Cookie/Set-Cookie
# header is always filtered).
return filter_request_headers(
headers, dc.http_headers.request, substitute=substitute
)

# Legacy behavior (data_collection not set explicitly).
if should_send_default_pii():
return headers

return {
k: (v if k.upper().replace("-", "_") not in SENSITIVE_HEADERS else substitute)
for k, v in headers.items()
}


def collect_query_string(
raw_query_string: "Optional[str]",
) -> "Optional[str]":
"""
Return the (possibly scrubbed) query string to attach to span attributes
(``http.query`` / ``url.query`` / the query portion of ``url.full``), or
``None`` if the query string should not be collected.

When ``data_collection`` is set explicitly, the ``query_params`` behavior
governs collection/scrubbing. Otherwise the legacy ``send_default_pii`` gate
applies (preserving current behavior).
"""
if not raw_query_string:
return None

dc = sentry_sdk.get_client().data_collection
if dc.explicit:
return scrub_query_string(raw_query_string, dc.query_params)

if should_send_default_pii():
return raw_query_string
return None


def should_collect_url() -> bool:
"""
Whether to collect non-query URL attributes (``url.full`` base and
``url.path``). These never contain query strings, so they are treated as
technical context and collected whenever ``data_collection`` is set
explicitly. Otherwise the legacy ``send_default_pii`` gate applies.
"""
dc = sentry_sdk.get_client().data_collection
if dc.explicit:
return True
return should_send_default_pii()


def _in_http_status_code_range(
code: object, code_ranges: "list[HttpStatusCodeRange]"
) -> bool:
Expand Down
40 changes: 30 additions & 10 deletions sentry_sdk/integrations/aiohttp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sentry_sdk
from sentry_sdk.api import continue_trace
from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS
from sentry_sdk.data_collection import scrub_query_string
from sentry_sdk.integrations import (
_DEFAULT_FAILED_REQUEST_STATUS_CODES,
DidNotEnable,
Expand All @@ -13,10 +14,12 @@
)
from sentry_sdk.integrations._wsgi_common import (
_filter_headers,
collect_query_string,
request_body_within_bounds,
should_collect_url,
)
from sentry_sdk.integrations.logging import ignore_logger
from sentry_sdk.scope import Scope, should_send_default_pii
from sentry_sdk.scope import Scope, should_collect_user_info
from sentry_sdk.sessions import track_session
from sentry_sdk.traces import (
SOURCE_FOR_STYLE as SEGMENT_SOURCE_FOR_STYLE,
Expand Down Expand Up @@ -159,20 +162,21 @@ async def sentry_app_handle(
header_value
)

url_attributes = {}
if should_send_default_pii():
url_attributes: "dict[str, Any]" = {}
if should_collect_url():
url_attributes["url.full"] = "%s://%s%s" % (
request.scheme,
request.host,
request.path,
)
url_attributes["url.path"] = request.path

if request.query_string:
url_attributes["url.query"] = request.query_string
query = collect_query_string(request.query_string)
if query:
url_attributes["url.query"] = query

client_address_attributes = {}
if should_send_default_pii() and request.remote:
if should_collect_user_info() and request.remote:
client_address_attributes["client.address"] = request.remote
scope.set_attribute(
SPANDATA.USER_IP_ADDRESS, request.remote
Expand Down Expand Up @@ -358,15 +362,18 @@ async def on_request_start(
"sentry.origin": AioHttpIntegration.origin,
"http.request.method": method,
}
if parsed_url is not None and should_send_default_pii():
if parsed_url is not None and should_collect_url():
attributes["url.full"] = parsed_url.url
attributes["url.path"] = params.url.path

if parsed_url.query:
attributes["url.query"] = parsed_url.query
if parsed_url.fragment:
attributes["url.fragment"] = parsed_url.fragment

if parsed_url is not None:
query = collect_query_string(parsed_url.query)
if query:
attributes["url.query"] = query

span = sentry_sdk.traces.start_span(name=span_name, attributes=attributes)
else:
legacy_span = sentry_sdk.start_span(
Expand Down Expand Up @@ -458,7 +465,20 @@ def aiohttp_processor(
request.path,
)

request_info["query_string"] = request.query_string
# Event request.query_string is set unconditionally in legacy mode;
# when data_collection is explicit it is governed by query_params.
query_string = request.query_string
dc = sentry_sdk.get_client().data_collection
if dc.explicit:
scrubbed_qs = (
scrub_query_string(query_string, dc.query_params)
if query_string
else None
)
if scrubbed_qs is not None:
request_info["query_string"] = scrubbed_qs
else:
request_info["query_string"] = query_string
request_info["method"] = request.method
request_info["env"] = {"REMOTE_ADDR": request.remote}
request_info["headers"] = _filter_headers(dict(request.headers))
Expand Down
4 changes: 2 additions & 2 deletions sentry_sdk/integrations/asgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
DEFAULT_HTTP_METHODS_TO_CAPTURE,
nullcontext,
)
from sentry_sdk.scope import Scope, should_send_default_pii
from sentry_sdk.scope import Scope, should_collect_user_info
from sentry_sdk.sessions import track_session
from sentry_sdk.traces import (
SOURCE_FOR_STYLE as SEGMENT_SOURCE_FOR_STYLE,
Expand Down Expand Up @@ -248,7 +248,7 @@ async def _run_app(
"network.protocol.name": ty,
}

if scope.get("client") and should_send_default_pii():
if scope.get("client") and should_collect_user_info():
sentry_scope.set_attribute(
SPANDATA.USER_IP_ADDRESS, _get_ip(scope)
)
Expand Down
30 changes: 24 additions & 6 deletions sentry_sdk/integrations/aws_lambda.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@
import sentry_sdk
from sentry_sdk.api import continue_trace
from sentry_sdk.consts import OP
from sentry_sdk.data_collection import COLLECTION_OFF, apply_key_value_collection
from sentry_sdk.integrations import Integration
from sentry_sdk.integrations._wsgi_common import _filter_headers
from sentry_sdk.integrations._wsgi_common import _filter_headers, collect_query_string
from sentry_sdk.integrations.cloud_resource_context import (
CLOUD_PLATFORM,
CLOUD_PROVIDER,
)
from sentry_sdk.scope import Scope, should_send_default_pii
from sentry_sdk.scope import (
Scope,
should_collect_user_info,
should_send_default_pii,
)
from sentry_sdk.traces import SegmentSource
from sentry_sdk.tracing import TransactionSource
from sentry_sdk.tracing_utils import has_span_streaming_enabled
Expand Down Expand Up @@ -164,10 +169,12 @@ def sentry_handler(
"httpMethod"
]

if should_send_default_pii() and "queryStringParameters" in request_data:
if "queryStringParameters" in request_data:
qs = request_data["queryStringParameters"]
if qs:
additional_attributes["url.query"] = urlencode(qs)
query_string = collect_query_string(urlencode(qs))
if query_string:
additional_attributes["url.query"] = query_string

sampling_context = {
"aws_event": aws_event,
Expand Down Expand Up @@ -409,12 +416,22 @@ def event_processor(
request["url"] = _get_url(aws_event, aws_context)

if "queryStringParameters" in aws_event:
request["query_string"] = aws_event["queryStringParameters"]
# Event request.query_string is set unconditionally in legacy mode;
# when data_collection is explicit it is governed by query_params.
qs = aws_event["queryStringParameters"]
dc = sentry_sdk.get_client().data_collection
if dc.explicit:
if qs and dc.query_params.mode != COLLECTION_OFF:
request["query_string"] = apply_key_value_collection(
qs, dc.query_params
)
else:
request["query_string"] = qs

if "headers" in aws_event:
request["headers"] = _filter_headers(aws_event["headers"])

if should_send_default_pii():
if should_collect_user_info():
user_info = sentry_event.setdefault("user", {})

identity = aws_event.get("identity")
Expand All @@ -429,6 +446,7 @@ def event_processor(
if ip is not None:
user_info.setdefault("ip_address", ip)

if should_send_default_pii():
if "body" in aws_event:
request["data"] = aws_event.get("body", "")
else:
Expand Down
Loading
Loading