From dc925cad65d81904b1ec5d11f76b1ae76475f0c5 Mon Sep 17 00:00:00 2001 From: Tomasz Wojtun Date: Tue, 10 May 2022 19:29:09 +0200 Subject: [PATCH] allow setting account id for cloudfront logs in prefix --- aws/logs_monitoring/parsing.py | 13 +++++++++++- aws/logs_monitoring/tests/test_parsing.py | 24 +++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/aws/logs_monitoring/parsing.py b/aws/logs_monitoring/parsing.py index cbf8af63d..4e8c65898 100644 --- a/aws/logs_monitoring/parsing.py +++ b/aws/logs_monitoring/parsing.py @@ -60,6 +60,8 @@ "\d+_CloudTrail_\w{2}-\w{4,9}-\d_\d{8}T\d{4}Z.+.json.gz$", re.I ) +account_id_regex = re.compile("^\d{12}$") + # Store the cache in the global scope so that it will be reused as long as # the log forwarder Lambda container is running account_cw_logs_tags_cache = CloudwatchLogGroupTagsCache() @@ -321,6 +323,9 @@ def is_cloudtrail(key): match = cloudtrail_regex.search(key) return bool(match) +def is_account_id(key): + match = account_id_regex.search(key) + return bool(match) def find_s3_source(key): # e.g. AWSLogs/123456779121/elasticloadbalancing/us-east-1/2020/10/02/123456779121_elasticloadbalancing_us-east-1_app.alb.xxxxx.xx.xxx.xxx_x.log.gz @@ -404,7 +409,7 @@ def parse_service_arn(source, key, bucket, context): if source == "cloudfront": # For Cloudfront logs we need to get the account and distribution id from the lambda arn and the filename # 1. We extract the cloudfront id from the filename - # 2. We extract the AWS account id from the lambda arn + # 2. We extract the AWS account id from the s3 key prefix or from the lambda arn # 3. We build the arn namesplit = key.split("/") if len(namesplit) > 0: @@ -413,6 +418,12 @@ def parse_service_arn(source, key, bucket, context): filenamesplit = filename.split(".") if len(filenamesplit) > 3: distributionID = filenamesplit[len(filenamesplit) - 4].lower() + prefixsplit = namesplit[0:-1] + for prefixpart in prefixsplit: + if is_account_id(prefixpart): + return "arn:aws:cloudfront::{}:distribution/{}".format( + prefixpart, distributionID + ) arn = context.invoked_function_arn arnsplit = arn.split(":") if len(arnsplit) == 7: diff --git a/aws/logs_monitoring/tests/test_parsing.py b/aws/logs_monitoring/tests/test_parsing.py index af45ac6a1..a7b615f99 100644 --- a/aws/logs_monitoring/tests/test_parsing.py +++ b/aws/logs_monitoring/tests/test_parsing.py @@ -5,6 +5,7 @@ import os import sys import unittest +from collections import namedtuple sys.modules["trace_forwarder.connection"] = MagicMock() sys.modules["datadog_lambda.wrapper"] = MagicMock() @@ -331,6 +332,29 @@ def test_elb_s3_key_multi_prefix(self): "arn:aws:elasticloadbalancing:us-east-1:123456789123:loadbalancer/app/my-alb-name/123456789aabcdef", ) + def test_cloudfront_s3_key_prefix(self): + Context = namedtuple("Context","invoked_function_arn") + self.assertEqual( + parse_service_arn( + "cloudfront", + "AWSLogs/cloudfront/123456789012/EMLARXS9EXAMPLE.2019-11-14-20.RT4KCN4SGK9.gz", + None, + Context("arn:aws:lambda:eu-central-1:123456789015:function:datadog-forwarder") + ), + "arn:aws:cloudfront::123456789012:distribution/emlarxs9example", + ) + + def test_cloudfront_s3_key_no_prefix(self): + Context = namedtuple("Context","invoked_function_arn") + self.assertEqual( + parse_service_arn( + "cloudfront", + "cloudfront/EMLARXS9EXAMPLE.2019-11-14-20.RT4KCN4SGK9.gz", + None, + Context("arn:aws:lambda:eu-central-1:123456789015:function:datadog-forwarder") + ), + "arn:aws:cloudfront::123456789015:distribution/emlarxs9example", + ) class TestParseAwsWafLogs(unittest.TestCase): def test_waf_string_invalid_json(self):