diff --git a/aws/logs_monitoring/parsing.py b/aws/logs_monitoring/parsing.py index cd57a8e55..1aee8d4c3 100644 --- a/aws/logs_monitoring/parsing.py +++ b/aws/logs_monitoring/parsing.py @@ -63,6 +63,8 @@ re.I, ) +account_id_regex = re.compile("^\d{12}$") + # Store the cache in the global scope so that it will be reused as long as # the log forwarder Lambda container is running account_cw_logs_tags_cache = CloudwatchLogGroupTagsCache() @@ -335,6 +337,9 @@ def is_cloudtrail(key): match = cloudtrail_regex.search(key) return bool(match) +def is_account_id(key): + match = account_id_regex.search(key) + return bool(match) def find_s3_source(key): # e.g. AWSLogs/123456779121/elasticloadbalancing/us-east-1/2020/10/02/123456779121_elasticloadbalancing_us-east-1_app.alb.xxxxx.xx.xxx.xxx_x.log.gz @@ -429,7 +434,7 @@ def parse_service_arn(source, key, bucket, context): if source == "cloudfront": # For Cloudfront logs we need to get the account and distribution id from the lambda arn and the filename # 1. We extract the cloudfront id from the filename - # 2. We extract the AWS account id from the lambda arn + # 2. We extract the AWS account id from the s3 key prefix or from the lambda arn # 3. We build the arn namesplit = key.split("/") if len(namesplit) > 0: @@ -438,6 +443,12 @@ def parse_service_arn(source, key, bucket, context): filenamesplit = filename.split(".") if len(filenamesplit) > 3: distributionID = filenamesplit[len(filenamesplit) - 4].lower() + prefixsplit = namesplit[0:-1] + for prefixpart in prefixsplit: + if is_account_id(prefixpart): + return "arn:aws:cloudfront::{}:distribution/{}".format( + prefixpart, distributionID + ) arn = context.invoked_function_arn arnsplit = arn.split(":") if len(arnsplit) == 7: diff --git a/aws/logs_monitoring/tests/test_parsing.py b/aws/logs_monitoring/tests/test_parsing.py index 9b5228fe9..dcf47c501 100644 --- a/aws/logs_monitoring/tests/test_parsing.py +++ b/aws/logs_monitoring/tests/test_parsing.py @@ -5,6 +5,7 @@ import os import sys import unittest +from collections import namedtuple sys.modules["trace_forwarder.connection"] = MagicMock() sys.modules["datadog_lambda.wrapper"] = MagicMock() @@ -368,6 +369,30 @@ def test_elb_s3_key_multi_prefix(self): "arn:aws:elasticloadbalancing:us-east-1:123456789123:loadbalancer/app/my-alb-name/123456789aabcdef", ) + def test_cloudfront_s3_key_prefix(self): + Context = namedtuple("Context","invoked_function_arn") + self.assertEqual( + parse_service_arn( + "cloudfront", + "AWSLogs/cloudfront/123456789012/EMLARXS9EXAMPLE.2019-11-14-20.RT4KCN4SGK9.gz", + None, + Context("arn:aws:lambda:eu-central-1:123456789015:function:datadog-forwarder") + ), + "arn:aws:cloudfront::123456789012:distribution/emlarxs9example", + ) + + def test_cloudfront_s3_key_no_prefix(self): + Context = namedtuple("Context","invoked_function_arn") + self.assertEqual( + parse_service_arn( + "cloudfront", + "cloudfront/EMLARXS9EXAMPLE.2019-11-14-20.RT4KCN4SGK9.gz", + None, + Context("arn:aws:lambda:eu-central-1:123456789015:function:datadog-forwarder") + ), + "arn:aws:cloudfront::123456789015:distribution/emlarxs9example", + ) + def test_elb_s3_key_multi_prefix_gov(self): self.assertEqual( parse_service_arn(