diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index b3f79a96..5991c6bd 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -43,6 +43,7 @@ is_step_function_event, EventTypes, EventSubtypes, + resolve_alb_request_headers, ) from datadog_lambda.durable import extract_context_from_durable_execution @@ -197,6 +198,11 @@ def extract_context_from_http_event_or_context( return context headers = event.get("headers") + if not isinstance(headers, dict) or not headers: + if isinstance(event.get("multiValueHeaders"), dict): + headers = resolve_alb_request_headers(event) + else: + headers = {} context = propagator.extract(headers) if not _is_context_complete(context): @@ -658,7 +664,9 @@ def extract_dd_trace_context( context = extract_context_from_request_header_or_context( event, lambda_context, event_source ) - elif isinstance(event, (set, dict)) and "headers" in event: + elif isinstance(event, (set, dict)) and ( + "headers" in event or "multiValueHeaders" in event + ): context = extract_context_from_http_event_or_context( event, lambda_context, event_source, decode_authorizer_context ) @@ -837,6 +845,9 @@ def create_inferred_span( elif event_source.equals(EventTypes.LAMBDA_FUNCTION_URL): logger.debug("Function URL event detected. Inferring a span") return create_inferred_span_from_lambda_function_url_event(event, context) + elif event_source.event_type == EventTypes.ALB: + logger.debug("ALB event detected. Inferring a span") + return create_inferred_span_from_alb_event(event, context) elif event_source.equals( EventTypes.API_GATEWAY, subtype=EventSubtypes.HTTP_API ): @@ -952,6 +963,54 @@ def create_inferred_span_from_lambda_function_url_event(event, context): return span +def create_inferred_span_from_alb_event(event, context): + request_context = event.get("requestContext") or {} + elb = request_context.get("elb") or {} + target_group_arn = elb.get("targetGroupArn") + + headers = resolve_alb_request_headers(event) + host = headers.get("host") + method = event.get("httpMethod") + path = event.get("path") + proto = headers.get("x-forwarded-proto", "http") + + # ALB has no api id; key the service mapping off the load-balancer host and + # fall back to it when DD_TRACE_AWS_SERVICE_REPRESENTATION_ENABLED is on. + service_name = determine_service_name(service_mapping, host, "lambda_alb", host) + + http_url = "%s://%s%s" % (proto, host, path) if host and path is not None else None + if method and path is not None: + resource = f"{method} {path}" + else: + resource = method or path + + tags = { + "operation_name": "aws.alb", + "span.kind": "server", + "http.method": method, + "http.url": http_url, + "http.useragent": headers.get("user-agent"), + "endpoint": path, + "resource_names": resource, + "request_id": context.aws_request_id, + "target_group_arn": target_group_arn, + } + # Drop tags we couldn't derive so the span never carries malformed values. + tags = {key: value for key, value in tags.items() if value is not None} + + tracer.set_tags(_dd_origin) + # ALB events carry no request timestamp (unlike API GW requestTimeEpoch / + # Function URL timeEpoch), so the span starts at handler time. + span = tracer.trace( + "aws.alb", service=service_name, resource=resource, span_type="http" + ) + InferredSpanInfo.set_tags(tags, tag_source="self", synchronicity="sync") + if span: + span.set_tags(tags) + span.set_metric(InferredSpanInfo.METRIC, 1.0) + return span + + def is_api_gateway_invocation_async(event): hdrs = event.get("headers") if not hdrs: diff --git a/datadog_lambda/trigger.py b/datadog_lambda/trigger.py index e60de8f0..fa7994d9 100644 --- a/datadog_lambda/trigger.py +++ b/datadog_lambda/trigger.py @@ -293,6 +293,39 @@ def get_event_source_arn(source: _EventSource, event: dict, context: Any) -> str return event_source_arn +def resolve_alb_request_headers(event): + """ + Resolve ALB request headers from single-value ``headers`` or + ``multiValueHeaders`` (first value per key, matching datadog-lambda-js). + """ + headers = event.get("headers") + if isinstance(headers, dict) and headers: + return headers + + multi_value = event.get("multiValueHeaders") + if not isinstance(multi_value, dict): + return {} + + resolved = {} + for key, value in multi_value.items(): + if isinstance(value, list): + if value: + resolved[key] = value[0] + elif isinstance(value, str): + resolved[key] = value + return resolved + + +def _get_header_case_insensitive(headers, name): + if not isinstance(headers, dict): + return None + name_lower = name.lower() + for key, value in headers.items(): + if isinstance(key, str) and key.lower() == name_lower and value: + return value + return None + + def extract_http_tags(event): """ Extracts HTTP facet tags from the triggering event @@ -306,6 +339,7 @@ def extract_http_tags(event): path = event.get("path") method = event.get("httpMethod") + request_headers = None if request_context and request_context.get("stage"): domain_name = request_context.get("domainName") @@ -324,19 +358,37 @@ def extract_http_tags(event): path = apigateway_v2_http.get("path") method = apigateway_v2_http.get("method") + elif request_context and request_context.get("elb"): + # ALB events have no requestContext.stage; derive the URL from the + # forwarded host/proto headers and the top-level path. + request_headers = resolve_alb_request_headers(event) + host = request_headers.get("host") + if host: + proto = request_headers.get("x-forwarded-proto", "http") + http_tags["http.url"] = proto + "://" + host + + user_agent = request_headers.get("user-agent") + if user_agent: + http_tags["http.useragent"] = user_agent + + # ALB carries no route template, so use the request path as the route. + if path: + http_tags["http.route"] = path + if path: if http_tags.get("http.url"): http_tags["http.url"] += path if method: http_tags["http.method"] = method - # Safely get headers - headers = event.get("headers", {}) - if not isinstance(headers, dict): - headers = {} + if request_headers is None: + request_headers = event.get("headers") + if not isinstance(request_headers, dict): + request_headers = {} - if headers and headers.get("Referer"): - http_tags["http.referer"] = headers.get("Referer") + referer = _get_header_case_insensitive(request_headers, "referer") + if referer: + http_tags["http.referer"] = referer # Try to get `routeKey` from API GW v2; otherwise try to get `resource` from API GW v1 route = event.get("routeKey") or event.get("resource") diff --git a/tests/test_tracing.py b/tests/test_tracing.py index fc18f6e5..7146d967 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -112,6 +112,10 @@ def _wrap(*args, **kwargs): "api-gateway-websocket-disconnect", Context(trace_id=12345, span_id=67890, sampling_priority=2), ), + ( + "application-load-balancer", + Context(trace_id=12345, span_id=67890, sampling_priority=2), + ), ( "authorizer-request-api-gateway-v1", Context( @@ -1953,6 +1957,178 @@ def test_remaps_specific_inferred_span_service_names_from_eventbridge_event( self.assertEqual(span2.get_tag("operation_name"), "aws.eventbridge") self.assertEqual(span2.service, "different.eventbridge.custom.event.sender") + def test_remaps_all_inferred_span_service_names_from_alb_event(self): + self.set_service_mapping({"lambda_alb": "new-name"}) + with open(f"{event_samples}application-load-balancer.json") as event: + original_event = json.load(event) + + ctx = get_mock_context() + ctx.aws_request_id = "123" + + span1 = create_inferred_span(original_event, ctx) + self.assertEqual(span1.get_tag("operation_name"), "aws.alb") + self.assertEqual(span1.service, "new-name") + + event2 = copy.deepcopy(original_event) + event2["headers"]["host"] = "different-alb.us-east-2.elb.amazonaws.com" + span2 = create_inferred_span(event2, ctx) + self.assertEqual(span2.get_tag("operation_name"), "aws.alb") + self.assertEqual(span2.service, "new-name") + + def test_remaps_specific_inferred_span_service_names_from_alb_event(self): + host = "lambda-alb-123578498.us-east-2.elb.amazonaws.com" + self.set_service_mapping({host: "mapped-alb-service"}) + with open(f"{event_samples}application-load-balancer.json") as event: + original_event = json.load(event) + + ctx = get_mock_context() + ctx.aws_request_id = "123" + + span1 = create_inferred_span(original_event, ctx) + self.assertEqual(span1.get_tag("operation_name"), "aws.alb") + self.assertEqual(span1.service, "mapped-alb-service") + + event2 = copy.deepcopy(original_event) + event2["headers"]["host"] = "other-alb.us-east-2.elb.amazonaws.com" + span2 = create_inferred_span(event2, ctx) + self.assertEqual(span2.get_tag("operation_name"), "aws.alb") + self.assertEqual(span2.service, "other-alb.us-east-2.elb.amazonaws.com") + + +class TestAlbInferredSpan(unittest.TestCase): + ALB_SAMPLE = "application-load-balancer" + ALB_MULTIVALUE = "application-load-balancer-multivalue-headers" + ALB_HOST = "lambda-alb-123578498.us-east-2.elb.amazonaws.com" + ALB_USER_AGENT = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" + ) + + def _load_event(self, sample_name): + with open(f"{event_samples}{sample_name}.json") as event_file: + return json.load(event_file) + + def test_create_inferred_span_from_alb_event(self): + event = self._load_event(self.ALB_SAMPLE) + ctx = get_mock_context(aws_request_id="123") + + span = create_inferred_span(event, ctx) + + self.assertIsNotNone(span) + self.assertEqual(span.name, "aws.alb") + self.assertEqual(span.span_type, "http") + self.assertEqual(span.service, self.ALB_HOST) + self.assertEqual(span.resource, "GET /lambda") + self.assertEqual(span.get_tag("operation_name"), "aws.alb") + self.assertEqual(span.get_tag("span.kind"), "server") + self.assertEqual(span.get_tag("http.method"), "GET") + self.assertEqual( + span.get_tag("http.url"), + "http://%s/lambda" % self.ALB_HOST, + ) + self.assertEqual(span.get_tag("http.useragent"), self.ALB_USER_AGENT) + self.assertEqual(span.get_tag("endpoint"), "/lambda") + self.assertEqual(span.get_tag("resource_names"), "GET /lambda") + self.assertEqual(span.get_tag("request_id"), "123") + self.assertEqual(span.get_tag("_inferred_span.synchronicity"), "sync") + self.assertEqual(span.get_tag("_inferred_span.tag_source"), "self") + self.assertEqual(span.get_metric("_dd._inferred_span"), 1.0) + self.assertEqual( + span.get_tag("target_group_arn"), + "arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/lambda-xyz/123abc", + ) + + def test_create_inferred_span_omits_tags_when_headers_missing(self): + event = self._load_event(self.ALB_SAMPLE) + del event["headers"] + event["httpMethod"] = None + event["path"] = None + + span = create_inferred_span(event, get_mock_context()) + + self.assertIsNotNone(span) + self.assertNotIn("http.url", span.get_tags()) + self.assertNotIn("http.method", span.get_tags()) + self.assertNotIn("http.useragent", span.get_tags()) + + def test_multivalue_headers_subtype_emits_inferred_span(self): + event = self._load_event(self.ALB_MULTIVALUE) + span = create_inferred_span(event, get_mock_context()) + self.assertIsNotNone(span) + self.assertEqual(span.name, "aws.alb") + self.assertEqual(span.get_tag("http.method"), "GET") + self.assertEqual( + span.get_tag("http.url"), + "http://%s/lambda" % self.ALB_HOST, + ) + self.assertEqual(span.get_tag("http.useragent"), self.ALB_USER_AGENT) + + @with_trace_propagation_style("datadog") + def test_inbound_datadog_context_from_multivalue_headers(self): + event = self._load_event(self.ALB_MULTIVALUE) + ctx = get_mock_context() + + parent_ctx, source, _ = extract_dd_trace_context(event, ctx) + self.assertIsNotNone(parent_ctx) + self.assertEqual(parent_ctx.trace_id, 12345) + self.assertEqual(parent_ctx.span_id, 67890) + + set_dd_trace_py_root(source, merge_xray_traces=False) + span = create_inferred_span(event, ctx) + self.assertEqual(span.trace_id, parent_ctx.trace_id) + self.assertEqual(span.parent_id, parent_ctx.span_id) + + @with_trace_propagation_style("datadog") + def test_inbound_datadog_context_parents_inferred_span(self): + event = self._load_event(self.ALB_SAMPLE) + ctx = get_mock_context() + + parent_ctx, source, _ = extract_dd_trace_context(event, ctx) + set_dd_trace_py_root(source, merge_xray_traces=False) + span = create_inferred_span(event, ctx) + + self.assertEqual(span.trace_id, parent_ctx.trace_id) + self.assertEqual(span.parent_id, parent_ctx.span_id) + + def test_inbound_w3c_context_extracted_from_alb_event(self): + event = self._load_event(self.ALB_SAMPLE) + event["headers"] = { + "host": self.ALB_HOST, + "user-agent": self.ALB_USER_AGENT, + "x-forwarded-proto": "http", + "traceparent": "00-0000000000000000000000000000abcd-000000000000004d-01", + "tracestate": "dd=s:1", + } + + ctx, source, _ = extract_dd_trace_context(event, get_mock_context()) + + self.assertIsNotNone(ctx) + self.assertEqual(source, TraceContextSource.EVENT) + self.assertEqual(ctx.trace_id, 0xABCD) + self.assertEqual(ctx.span_id, 0x4D) + + def test_http_url_uses_https_when_forwarded_proto_is_https(self): + event = self._load_event(self.ALB_SAMPLE) + event["headers"]["x-forwarded-proto"] = "https" + + span = create_inferred_span(event, get_mock_context()) + + self.assertEqual( + span.get_tag("http.url"), + "https://%s/lambda" % self.ALB_HOST, + ) + + def test_http_url_excludes_query_string(self): + event = self._load_event(self.ALB_SAMPLE) + + span = create_inferred_span(event, get_mock_context()) + + self.assertEqual( + span.get_tag("http.url"), + "http://%s/lambda" % self.ALB_HOST, + ) + self.assertNotIn("query=", span.get_tag("http.url") or "") + class _Span(object): def __init__(self, service, start, span_type, parent_name=None, tags=None): diff --git a/tests/test_trigger.py b/tests/test_trigger.py index f10fcbbf..477aeec1 100644 --- a/tests/test_trigger.py +++ b/tests/test_trigger.py @@ -427,10 +427,49 @@ def test_extract_trigger_tags_application_load_balancer(self): "function_trigger.event_source": "application-load-balancer", "function_trigger.event_source_arn": "arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/lambda-xyz/123abc", "http.method": "GET", + "http.url": "http://lambda-alb-123578498.us-east-2.elb.amazonaws.com/lambda", + "http.route": "/lambda", + "http.useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36", "span.kind": "server", }, ) + def test_extract_trigger_tags_application_load_balancer_multivalue_headers(self): + event_sample_source = "application-load-balancer-multivalue-headers" + test_file = event_samples + event_sample_source + ".json" + ctx = get_mock_context() + with open(test_file, "r") as event: + event = json.load(event) + tags = extract_trigger_tags(event, ctx) + + assert tags.get("function_trigger.event_source") == "application-load-balancer" + assert tags.get("http.method") == "GET" + assert tags.get("http.route") == "/lambda" + assert tags.get("http.url") == ( + "http://lambda-alb-123578498.us-east-2.elb.amazonaws.com/lambda" + ) + assert tags.get("http.useragent").startswith("Mozilla/5.0") + + def test_extract_trigger_tags_alb_referer_from_lowercase_headers(self): + event_sample_source = "application-load-balancer" + with open(event_samples + event_sample_source + ".json") as event_file: + event = json.load(event_file) + event["headers"]["referer"] = "https://example.com/page" + + tags = extract_trigger_tags(event, get_mock_context()) + + self.assertEqual(tags.get("http.referer"), "https://example.com/page") + + def test_extract_trigger_tags_alb_referer_from_multivalue_headers(self): + event_sample_source = "application-load-balancer-multivalue-headers" + with open(event_samples + event_sample_source + ".json") as event_file: + event = json.load(event_file) + event["multiValueHeaders"]["referer"] = ["https://example.com/page"] + + tags = extract_trigger_tags(event, get_mock_context()) + + self.assertEqual(tags.get("http.referer"), "https://example.com/page") + def test_extract_trigger_tags_cloudfront(self): event_sample_source = "cloudfront" test_file = event_samples + event_sample_source + ".json" @@ -617,6 +656,19 @@ def test_extract_http_tags_with_invalid_headers(self): # Should not raise an exception self.assertEqual(http_tags, {"span.kind": "server"}) + def test_extract_http_tags_referer_case_insensitive(self): + from datadog_lambda.trigger import extract_http_tags + + event = {"headers": {"Referer": "https://example.com/capitalized"}} + http_tags = extract_http_tags(event) + self.assertEqual( + http_tags.get("http.referer"), "https://example.com/capitalized" + ) + + event = {"headers": {"referer": "https://example.com/lowercase"}} + http_tags = extract_http_tags(event) + self.assertEqual(http_tags.get("http.referer"), "https://example.com/lowercase") + def test_extract_http_tags_with_invalid_route(self): from datadog_lambda.trigger import extract_http_tags diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index 87edd322..26a53b0c 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -1001,3 +1001,151 @@ def test_profiling_import_errors_caught(monkeypatch): ) # force ModuleNotFoundError importlib.reload(wrapper) assert not hasattr(wrapper.datadog_lambda_wrapper, "prof") + + +class TestAlbInferredSpanWrapper(unittest.TestCase): + """End-to-end wrapper tests for the inferred aws.alb span (FRSLES-851).""" + + def setUp(self): + patch("ddtrace.internal.remoteconfig.worker.RemoteConfigPoller").start() + wrapper.datadog_lambda_wrapper._force_wrap = True + + patcher = patch( + "datadog.threadstats.reporters.HttpReporter.flush_distributions" + ) + self.mock_flush_distributions = patcher.start() + self.addCleanup(patcher.stop) + + patcher = patch("datadog_lambda.xray.send_segment") + self.mock_send_segment = patcher.start() + self.addCleanup(patcher.stop) + + patcher = patch("datadog_lambda.wrapper.create_dd_dummy_metadata_subsegment") + self.mock_create_dd_dummy_metadata_subsegment = patcher.start() + self.addCleanup(patcher.stop) + + with open("tests/event_samples/application-load-balancer.json") as f: + self.alb_event = json.load(f) + + def _alb_response(self, status_code=200): + return { + "statusCode": status_code, + "statusDescription": f"{status_code} OK", + "headers": {"Content-Type": "application/json"}, + "body": "{}", + "isBase64Encoded": False, + } + + @patch("datadog_lambda.config.Config.trace_enabled", True) + @patch("datadog_lambda.config.Config.make_inferred_span", True) + def test_wrapper_emits_inferred_alb_span_with_http_tags(self): + @wrapper.datadog_lambda_wrapper + def lambda_handler(event, context): + return self._alb_response(200) + + lambda_handler(self.alb_event, get_mock_context()) + + inferred = lambda_handler.inferred_span + execution = lambda_handler.span + + self.assertIsNotNone(inferred) + self.assertEqual(inferred.name, "aws.alb") + self.assertEqual(inferred.get_tag("operation_name"), "aws.alb") + self.assertEqual(inferred.get_tag("http.method"), "GET") + self.assertEqual( + inferred.get_tag("http.url"), + "http://lambda-alb-123578498.us-east-2.elb.amazonaws.com/lambda", + ) + self.assertEqual(inferred.get_tag("http.status_code"), "200") + self.assertEqual(inferred.get_tag("http.route"), "/lambda") + self.assertEqual(execution.parent_id, inferred.span_id) + + @patch("datadog_lambda.config.Config.trace_enabled", True) + @patch("datadog_lambda.config.Config.make_inferred_span", True) + @patch("datadog_lambda.config.Config.service", "alb-demo-downstream") + def test_wrapper_sets_peer_service_and_dd_resource_key(self): + @wrapper.datadog_lambda_wrapper + def lambda_handler(event, context): + return self._alb_response(200) + + lambda_handler(self.alb_event, get_mock_context()) + + inferred = lambda_handler.inferred_span + + self.assertEqual(inferred.get_tag("peer.service"), "alb-demo-downstream") + self.assertEqual( + inferred.get_tag("dd_resource_key"), + "arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/lambda-xyz/123abc", + ) + + @patch("datadog_lambda.config.Config.trace_enabled", True) + @patch("datadog_lambda.config.Config.make_inferred_span", False) + def test_wrapper_skips_inferred_alb_span_when_disabled(self): + @wrapper.datadog_lambda_wrapper + def lambda_handler(event, context): + return self._alb_response(200) + + lambda_handler(self.alb_event, get_mock_context()) + + self.assertIsNone(lambda_handler.inferred_span) + self.assertIsNotNone(lambda_handler.span) + + @patch("datadog_lambda.config.Config.trace_enabled", True) + @patch("datadog_lambda.config.Config.make_inferred_span", True) + def test_wrapper_inferred_alb_span_joins_inbound_datadog_context(self): + @wrapper.datadog_lambda_wrapper + def lambda_handler(event, context): + return self._alb_response(200) + + lambda_handler(self.alb_event, get_mock_context()) + + inferred = lambda_handler.inferred_span + + self.assertIsNotNone(inferred) + # Fixture carries x-datadog-trace-id=12345, x-datadog-parent-id=67890 + self.assertEqual(inferred.trace_id, 12345) + self.assertEqual(inferred.parent_id, 67890) + + @patch("datadog_lambda.config.Config.trace_enabled", True) + @patch("datadog_lambda.config.Config.make_inferred_span", True) + def test_wrapper_sets_error_on_inferred_alb_span_for_5xx(self): + @wrapper.datadog_lambda_wrapper + def lambda_handler(event, context): + return self._alb_response(502) + + lambda_handler(self.alb_event, get_mock_context()) + + inferred = lambda_handler.inferred_span + execution = lambda_handler.span + + self.assertEqual(inferred.get_tag("http.status_code"), "502") + self.assertEqual(execution.get_tag("http.status_code"), "502") + self.assertEqual(execution.error, 1) + + @patch("datadog_lambda.config.Config.trace_enabled", True) + @patch("datadog_lambda.config.Config.make_inferred_span", True) + def test_wrapper_emits_inferred_alb_span_for_multivalue_headers(self): + with open( + "tests/event_samples/application-load-balancer-multivalue-headers.json" + ) as f: + event = json.load(f) + + @wrapper.datadog_lambda_wrapper + def lambda_handler(event, context): + return self._alb_response(200) + + lambda_handler(event, get_mock_context()) + + inferred = lambda_handler.inferred_span + execution = lambda_handler.span + + self.assertIsNotNone(inferred) + self.assertEqual(inferred.name, "aws.alb") + self.assertEqual(inferred.get_tag("http.method"), "GET") + self.assertEqual( + inferred.get_tag("http.url"), + "http://lambda-alb-123578498.us-east-2.elb.amazonaws.com/lambda", + ) + self.assertEqual(inferred.get_tag("http.status_code"), "200") + self.assertEqual(inferred.get_tag("http.route"), "/lambda") + self.assertEqual(execution.parent_id, inferred.span_id)