From 6bd0c9d8ccf9473caf666a9d496e5862a422c86d Mon Sep 17 00:00:00 2001 From: Sunny Date: Thu, 29 May 2025 13:41:01 +0100 Subject: [PATCH 1/2] updated aws_admin_tools to include cost explorer data and added placeholder code for infra data collection --- roles/aws/aws_admin_tools/defaults/main.yml | 39 + .../templates/api_get_cost_explorer_data.j2 | 359 ++++++++ .../schedule_save_infra_data_to_s3.j2 | 792 ++++++++++++++++++ 3 files changed, 1190 insertions(+) create mode 100644 roles/aws/aws_admin_tools/templates/api_get_cost_explorer_data.j2 create mode 100644 roles/aws/aws_admin_tools/templates/schedule_save_infra_data_to_s3.j2 diff --git a/roles/aws/aws_admin_tools/defaults/main.yml b/roles/aws/aws_admin_tools/defaults/main.yml index f2c46b7d4..63f558e2f 100644 --- a/roles/aws/aws_admin_tools/defaults/main.yml +++ b/roles/aws/aws_admin_tools/defaults/main.yml @@ -45,3 +45,42 @@ aws_admin_tools: resource: "*" action: - "wafv2:GetWebACL" + - name: "get_cost_explorer_data" + type: GET + policies: [] + inline_policies: + name: "get_cost_explorer_data" + resource: "*" + action: + - "lambda:InvokeFunction" + - "ce:GetCostAndUsage" + - name: "save_infra_data_to_s3" + schedule: + cron: "cron(0 6,12 ? * MON-FRI *)" + timeout: 300 + policies: [] + inline_policies: + name: "save_infra_data_to_s3" + resource: "*" + action: + - "logs:CreateLogGroup" + - "logs:CreateLogStream" + - "logs:PutLogEvents" + - "ec2:Describe*" + - "rds:Describe*" + - "elasticloadbalancing:Describe*" + - "cloudfront:ListDistributions" + - "s3:List*" + - "s3:Get*" + - "autoscaling:Describe*" + - "sts:GetCallerIdentity" + - "s3:PutObject" + - name: "get_infra_data_from_s3" # This is the bit that sets up the API Gateway to pull and share infra data over API. No Lambda function needed here though. How do we do this? See docs + type: GET + policies: [] + inline_policies: + name: "get_infra_data_from_s3" + resource: "*" + action: + - "lambda:InvokeFunction" # ?? + - "ce:GetCostAndUsage" # ?? \ No newline at end of file diff --git a/roles/aws/aws_admin_tools/templates/api_get_cost_explorer_data.j2 b/roles/aws/aws_admin_tools/templates/api_get_cost_explorer_data.j2 new file mode 100644 index 000000000..60d85b8ed --- /dev/null +++ b/roles/aws/aws_admin_tools/templates/api_get_cost_explorer_data.j2 @@ -0,0 +1,359 @@ +import json +import boto3 +import datetime +from typing import Dict, List, Optional, Any +import time + +# Ansible injected region variable for client-specific region (overridden via Ansible templating) +ANSIBLE_REGION = "{{ _aws_region }}" + +# Mapping of AWS service names to friendly names +SERVICE_NAME_MAPPING: Dict[str, str] = { + "APN Annual Program Fee": "APN Fee", + "AWS AppSync": "AppSync", + "AWS Backup": "AWS Backup", + "AWS CloudFormation": "CloudFormation", + "AWS CloudShell": "CloudShell", + "AWS CloudTrail": "CloudTrail", + "AWS CodeBuild": "CodeBuild", + "AWS Comprehend": "Comprehend", + "AWS Config": "Config", + "AWS Cost Explorer": "Cost Explorer", + "AWS Glue": "Glue", + "AWS GuardDuty": "GuardDuty", + "AWS Inspector": "Inspector", + "AWS Key Management Service": "KMS", + "AWS Lambda": "Lambda", + "AWS Location": "Location Service", + "AWS Macie": "Macie", + "AWS Secrets Manager": "Secrets Manager", + "AWS Security Hub": "Security Hub", + "AWS Step Functions": "Step Functions", + "AWS Support (Business)": "Support (Business)", + "AWS Support (Developer)": "Support (Developer)", + "AWS Systems Manager": "Systems Manager", + "AWS X-Ray": "X-Ray", + "AWS WAF": "WAF", + "Amazon AppSync": "AppSync", + "Amazon Bedrock": "Bedrock", + "Amazon CloudFront": "CloudFront", + "Amazon CloudSearch": "CloudSearch", + "Amazon CloudWatch": "CloudWatch", + "Amazon Cognito": "Cognito", + "Amazon DynamoDB": "DynamoDB", + "Amazon EC2 Container Registry (ECR)": "EC2 Container Registry", + "Amazon ElastiCache": "ElastiCache", + "Amazon Elastic Compute Cloud - Compute": "EC2-Instances", + "Amazon Elastic Container Service": "ECS", + "Amazon Elastic Container Service for Kubernetes": "ECS for K8", + "Amazon Elastic File System": "EFS", + "Amazon Elastic Load Balancing": "ELB", + "Amazon Glacier": "Glacier", + "Amazon GuardDuty": "GuardDuty", + "Amazon Inspector": "Inspector", + "Amazon Kinesis": "Kinesis", + "Amazon Lightsail": "Lightsail", + "Amazon Location Service": "Location Service", + "Amazon OpenSearch Service": "OpenSearch", + "Amazon Polly": "Polly", + "Amazon Registrar": "Registrar", + "Amazon Relational Database Service": "RDS", + "Amazon Route 53": "Route 53", + "Amazon S3": "S3", + "Amazon SES": "SES", + "Amazon SNS": "SNS", + "Amazon SQS": "SQS", + "Amazon Simple Email Service": "SES", + "Amazon Simple Notification Service": "SNS", + "Amazon Simple Queue Service": "SQS", + "Amazon Simple Storage Service": "S3", + "Amazon Simple Workflow Service": "Simple Workflow Service", + "Amazon Systems Manager": "Systems Manager", + "Amazon Textract": "Textract", + "Amazon Translate": "Translate", + "Amazon Virtual Private Cloud": "VPC", + "Amazon WAF": "WAF", + "AmazonCloudWatch": "CloudWatch", + "Claude (Amazon Bedrock Edition)": "Claude (Bedrock)", + "CloudFront Security Bundle": "CF Security Bundle", + "CloudWatch Events": "CloudWatch Events", + "Cyber Security Cloud Managed Rules for AWS WAF": "Cyber Security Cloud Managed Rules for AWS WAF -HighSecurity OWASP Set-", + "EC2 - Other": "EC2-Other", + "Savings Plans for AWS Compute usage": "Savings Plans for AWS Compute", + "Tax": "Tax" +} + +# Initialize Cost Explorer client with ansible region override +ce_client = boto3.client('ce', region_name=ANSIBLE_REGION) + +def calculate_date_range(months: int = 2) -> Dict[str, str]: + """ + Calculate start and end dates for the given number of months. + Default is 2 months (previous full month + current month partial). + """ + today = datetime.date.today() + first_current_month = today.replace(day=1) + + # Calculate start date (first day of N months ago) + if months <= 1: + # If months is 1 or less, start from the beginning of current month + start_date = first_current_month + else: + # Start from the beginning of (months-1) months ago + month_offset = months - 1 + year_offset = month_offset // 12 + month_remainder = month_offset % 12 + + start_month = first_current_month.month - month_remainder + start_year = first_current_month.year - year_offset + + if start_month <= 0: + start_month += 12 + start_year -= 1 + + start_date = datetime.date(start_year, start_month, 1) + + return { + 'Start': start_date.isoformat(), + 'End': today.isoformat() + } + +def process_cost_explorer_response(response: Dict[str, Any]) -> Dict[str, Any]: + """ + Process the AWS Cost Explorer response into a more user-friendly format. + """ + results = response.get("ResultsByTime", []) + if not results: + return {"error": "No cost data found"} + + # Initialize data structures + time_periods = [] + + # Current date to identify partial months + current_date = datetime.date.today() + current_month_start = current_date.replace(day=1) + current_month_iso = current_month_start.isoformat() + + # Tracking for averages + full_months_data = [] + all_services = set() + + # Process each time period + for result in results: + time_period = result.get("TimePeriod", {}) + start = time_period.get("Start", "") + end = time_period.get("End", "") + + # Initialize period data + period_data = { + "start": start, + "end": end, + "services": {}, + "total": 0.0, + "is_full_month": True if start != current_month_iso else False + } + + # Process each service group in this period + for group in result.get("Groups", []): + raw_service_name = group.get("Keys", [""])[0] + friendly_name = SERVICE_NAME_MAPPING.get(raw_service_name, raw_service_name) + all_services.add(friendly_name) + + try: + amount = float(group.get("Metrics", {}).get("UnblendedCost", {}).get("Amount", "0")) + except ValueError: + amount = 0.0 + + period_data["services"][friendly_name] = round(amount, 2) + period_data["total"] += amount + + period_data["total"] = round(period_data["total"], 2) + + # Store full month data for average calculation + if period_data["is_full_month"]: + full_months_data.append(period_data) + + # Remove the is_full_month flag before adding to time_periods + is_full = period_data.pop("is_full_month", False) + time_periods.append(period_data) + + # Calculate averages from full months only + averages = {"services": {}, "total": 0.0, "months_included": len(full_months_data)} + + if full_months_data: + # Initialize all services to 0 + for service in all_services: + averages["services"][service] = 0.0 + + # Sum up the costs for each service + for period in full_months_data: + for service, amount in period["services"].items(): + averages["services"][service] += amount + averages["total"] += period["total"] + + # Calculate the average by dividing by the number of full months + full_month_count = len(full_months_data) + if full_month_count > 0: + averages["total"] = round(averages["total"] / full_month_count, 2) + for service in averages["services"]: + averages["services"][service] = round(averages["services"][service] / full_month_count, 2) + + # Sort services by average cost (descending) + averages["services"] = {k: averages["services"][k] for k in + sorted(averages["services"].keys(), + key=lambda x: averages["services"].get(x, 0), + reverse=True) + if averages["services"][k] > 0} + + # Final structured output + return { + "average": averages, + "time_periods": time_periods + } + +def lambda_handler(event, context): + try: + # Ensure event is a dictionary + if event is None: + event = {} + + # More comprehensive debugging + debug_info = { + "event_received": event is not None, + "event_type": type(event).__name__, + "event_keys": list(event.keys()) if isinstance(event, dict) else "not_a_dict" + } + + # Add current time for reference + debug_info["timestamp"] = time.time() + debug_info["formatted_time"] = time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime()) + + # Safely get nested values + def safe_get(obj, *keys): + if not isinstance(obj, dict): + return None + current = obj + for key in keys: + if not isinstance(current, dict) or key not in current: + return None + current = current[key] + return current + + # More safely extract values + params_struct = safe_get(event, 'params') + querystring_struct = safe_get(event, 'params', 'querystring') + query_params = safe_get(event, 'queryStringParameters') + + debug_info.update({ + "params_structure": params_struct, + "querystring_structure": querystring_struct, + "query_string_params": query_params, + "full_context": { + "path": safe_get(event, 'path'), + "method": safe_get(event, 'httpMethod'), + "headers": safe_get(event, 'headers'), + "raw_query_string": safe_get(event, 'rawQueryString'), + "is_base64_encoded": safe_get(event, 'isBase64Encoded') + } + }) + + # Get months parameter from query string (default to 2) + months = 2 + debug_info["default_months"] = months + + # Track parameter access attempts + param_attempts = [] + + # Safely try to get months parameter + try: + # Try to get months from new template structure (Method Request Pass-Through) + param_attempts.append("Checking event['params']['querystring']") + months_param_1 = safe_get(event, 'params', 'querystring', 'months') + + if months_param_1 is not None: + param_attempts.append(f"Found months in params.querystring: {months_param_1}") + try: + months = int(months_param_1) + param_attempts.append(f"Parsed value: {months}") + if months < 1: + months = 2 # Use default if invalid + param_attempts.append(f"Value was < 1, reset to 2") + except (ValueError, TypeError) as e: + param_attempts.append(f"Error parsing: {str(e)}") + # Continue with default value + else: + param_attempts.append("months not found in params.querystring") + + # Fall back to the original query string parameter format + if months == 2: # If not set by previous method + param_attempts.append("Trying fallback: event['queryStringParameters']") + months_param_2 = safe_get(event, 'queryStringParameters', 'months') + + if months_param_2 is not None: + param_attempts.append(f"Found months in queryStringParameters: {months_param_2}") + try: + months = int(months_param_2) + param_attempts.append(f"Parsed value: {months}") + if months < 1: + months = 2 # Use default if invalid + param_attempts.append(f"Value was < 1, reset to 2") + except (ValueError, TypeError) as e: + param_attempts.append(f"Error parsing: {str(e)}") + # Continue with default value + else: + param_attempts.append("months not found in queryStringParameters") + except Exception as e: + param_attempts.append(f"Error during parameter processing: {str(e)}") + # Continue with default value + + # Track the final months value used + debug_info["final_months_value"] = months + debug_info["param_attempts"] = param_attempts + + # Calculate time period + time_period = calculate_date_range(months) + debug_info["calculated_time_period"] = time_period + + # Call AWS Cost Explorer API + response = ce_client.get_cost_and_usage( + TimePeriod=time_period, + Granularity='MONTHLY', + Metrics=['UnblendedCost'], + GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}] + ) + + # Process response into a more user-friendly format + processed_data = process_cost_explorer_response(response) + + # Add metadata + processed_data["metadata"] = { + "start_date": time_period["Start"], + "end_date": time_period["End"], + "months_requested": months, + "usage_info": "To request a specific number of months, add '?months=N' to the URL (e.g., '/cost-explorer?months=6' for 6 months of data)", + "debug": debug_info + } + + # Return processed data with proper API Gateway format + return { + 'statusCode': 200, + 'headers': { + 'Content-Type': 'application/json', + 'Access-Control-Allow-Origin': '*' + }, + 'body': json.dumps(processed_data) + } + + except Exception as e: + # Return error with proper API Gateway format + return { + 'statusCode': 500, + 'headers': { + 'Content-Type': 'application/json', + 'Access-Control-Allow-Origin': '*' + }, + 'body': json.dumps({ + 'error': 'CostExplorerError', + 'message': str(e) + }) + } \ No newline at end of file diff --git a/roles/aws/aws_admin_tools/templates/schedule_save_infra_data_to_s3.j2 b/roles/aws/aws_admin_tools/templates/schedule_save_infra_data_to_s3.j2 new file mode 100644 index 000000000..770bae971 --- /dev/null +++ b/roles/aws/aws_admin_tools/templates/schedule_save_infra_data_to_s3.j2 @@ -0,0 +1,792 @@ +""" +AWS Lambda Function for Infrastructure Data Collection + +This Lambda function collects AWS infrastructure data and saves to S3. +It is a Lambda-compatible refactoring of the aws-cli_data-collection.py script. + +Dependencies: boto3, json, logging + +Environment Variables: + S3_BUCKET_NAME - S3 bucket to store the output JSON + S3_KEY_PREFIX - (Optional) Prefix to use for S3 keys (default: 'infra-data/') + DEFAULT_REGIONS - (Optional) Comma-separated regions to collect data from (default: all regions) + ACCOUNT_ALIAS - (Optional) Name to use for the account in metadata (default: AWS account ID) + +Event Parameters: + specific_region - Single region to process (overrides DEFAULT_REGIONS) + regions - List of regions to process (overrides DEFAULT_REGIONS and specific_region) +""" + +import json +import logging +import os +import sys +from datetime import datetime +from pathlib import Path +import concurrent.futures +from typing import Dict, List, Optional, Any, Tuple, Union + +import boto3 +from botocore.exceptions import ClientError + +# Configure logging for Lambda environment +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +# Constants +ALL_REGIONS = [ + "ap-south-1", "ap-northeast-3", "ap-northeast-2", "ap-southeast-1", + "ap-southeast-2", "ap-northeast-1", "ca-central-1", "eu-central-1", + "eu-west-1", "eu-west-2", "eu-west-3", "eu-north-1", "sa-east-1", + "us-east-1", "us-east-2", "us-west-1", "us-west-2" +] + +# S3 configuration from environment variables +S3_BUCKET_NAME = os.environ.get('S3_BUCKET_NAME') +S3_KEY_PREFIX = os.environ.get('S3_KEY_PREFIX', 'infra-data/') +# Ansible injected region variable for client-specific region (overridden via Ansible templating) +ANSIBLE_REGION = "{{ _aws_region }}" + +def get_account_identifier() -> str: + """Get account ID or alias to use as identifier.""" + # First check if there's an environment variable defined + account_alias = os.environ.get('ACCOUNT_ALIAS') + if account_alias: + return account_alias + + # Otherwise use the AWS account ID from STS + try: + sts_client = boto3.client('sts') + account_id = sts_client.get_caller_identity()['Account'] + return account_id + except Exception as e: + logger.warning(f"Failed to get account ID: {str(e)}") + return "unknown-account" + +def get_regions(event: Dict[str, Any]) -> List[str]: + """Get list of regions to process based on Lambda event or environment variables.""" + # Ansible region override (set via Ansible templating) + if ANSIBLE_REGION: + return [ANSIBLE_REGION] + # Check for regions in event + if 'regions' in event and isinstance(event['regions'], list): + return event['regions'] + + # Check for single region in event + if 'specific_region' in event and event['specific_region']: + return [event['specific_region']] + + # Check for regions in environment variable + env_regions = os.environ.get('DEFAULT_REGIONS') + if env_regions: + return [r.strip() for r in env_regions.split(',')] + + # Default to all regions + return ALL_REGIONS + +def create_session() -> boto3.Session: + """ + Create an AWS session using Lambda execution role credentials. + + In Lambda, this automatically uses the IAM role assigned to the function. + """ + return boto3.Session() + +def collect_ec2(session, region): + """Collect EC2 instance data for a region.""" + try: + ec2_client = session.client('ec2', region_name=region) + response = ec2_client.describe_instances() + + instances = [] + for reservation in response.get('Reservations', []): + for instance in reservation.get('Instances', []): + # Extract instance name from tags + instance_name = "N/A" + if instance.get('Tags'): + for tag in instance.get('Tags', []): + if tag['Key'] == 'Name': + instance_name = tag['Value'] + break + + # Determine pricing model + if instance.get('InstanceLifecycle') is None: + pricing_model = "On-Demand" + elif instance.get('InstanceLifecycle') == "spot": + pricing_model = "Spot Instances" + else: + pricing_model = instance.get('InstanceLifecycle') + + # Get volume information + try: + volumes_response = ec2_client.describe_volumes( + Filters=[ + {'Name': 'attachment.instance-id', 'Values': [instance['InstanceId']]} + ] + ) + + root_volume_size = 0 + storage_type = "N/A" + additional_ebs_volumes = [] + + volumes = volumes_response.get('Volumes', []) + if volumes: + root_volume_size = volumes[0].get('Size', 0) + storage_type = volumes[0].get('VolumeType', 'N/A') + + for volume in volumes[1:]: + additional_ebs_volumes.append({ + 'Type': volume.get('VolumeType', 'N/A'), + 'Size': volume.get('Size', 0) + }) + except Exception as e: + logger.warning(f"Error getting volume info for instance {instance.get('InstanceId')}: {str(e)}") + + instances.append({ + 'InstanceName': instance_name, + 'InstanceID': instance['InstanceId'], + 'Region': region, + 'InstanceType': instance.get('InstanceType', 'N/A'), + 'Status': instance.get('State', {}).get('Name', 'N/A'), + 'StorageType': storage_type, + 'RootVolumeSize': root_volume_size, + 'AdditionalEBSVolumes': additional_ebs_volumes, + 'PricingModel': pricing_model + }) + + return instances + except ClientError as e: + logger.error(f"Error collecting EC2 data for region {region}: {str(e)}") + return [] + +def collect_asg(session, region): + """Collect Auto Scaling Group data for a region.""" + try: + asg_client = session.client('autoscaling', region_name=region) + response = asg_client.describe_auto_scaling_groups() + + asgs = [] + for asg in response.get('AutoScalingGroups', []): + asgs.append({ + 'ASGName': asg.get('AutoScalingGroupName', 'N/A'), + 'Region': region, + 'DesiredCapacity': asg.get('DesiredCapacity', 0), + 'MinCapacity': asg.get('MinSize', 0), + 'MaxCapacity': asg.get('MaxSize', 0) + }) + + return asgs + except ClientError as e: + logger.error(f"Error collecting ASG data for region {region}: {str(e)}") + return [] + +def collect_rds(session, region): + """Collect RDS database data for a region.""" + try: + rds_client = session.client('rds', region_name=region) + response = rds_client.describe_db_instances() + + databases = [] + for db in response.get('DBInstances', []): + databases.append({ + 'DBInstanceIdentifier': db.get('DBInstanceIdentifier', 'N/A'), + 'Region': region, + 'EngineType': db.get('Engine', 'N/A'), + 'DBInstanceClass': db.get('DBInstanceClass', 'N/A'), + 'AllocatedStorage': db.get('AllocatedStorage', 0), + 'StorageType': db.get('StorageType', 'N/A'), + 'MultiAZ': db.get('MultiAZ', False), + 'ReadReplicas': db.get('ReadReplicaDBInstanceIdentifiers', []) + }) + + return databases + except ClientError as e: + logger.error(f"Error collecting RDS data for region {region}: {str(e)}") + return [] + +def collect_s3(session): + """Collect S3 bucket data.""" + try: + s3_client = session.client('s3') + response = s3_client.list_buckets() + + buckets = [] + for bucket in response.get('Buckets', []): + bucket_name = bucket.get('Name') + + # Get bucket region + try: + location_response = s3_client.get_bucket_location(Bucket=bucket_name) + region = location_response.get('LocationConstraint') + if region is None: + region = 'us-east-1' + except ClientError as e: + logger.warning(f"Error getting location for bucket {bucket_name}: {str(e)}") + region = 'us-east-1' + + buckets.append({ + 'BucketName': bucket_name, + 'Region': region, + 'StorageClass': 'Standard', + 'NumberOfObjects': 0, + 'TotalStorageUsed_GB': 0 + }) + + return buckets + except ClientError as e: + logger.error(f"Error collecting S3 data: {str(e)}") + return [] + +def collect_nat_gw(session, region): + """Collect NAT Gateway data for a region.""" + try: + ec2_client = session.client('ec2', region_name=region) + response = ec2_client.describe_nat_gateways() + + nat_gateways = [] + for nat in response.get('NatGateways', []): + elastic_ips = [] + for address in nat.get('NatGatewayAddresses', []): + if address.get('PublicIp'): + elastic_ips.append(address.get('PublicIp')) + + nat_gateways.append({ + 'NATGatewayID': nat.get('NatGatewayId', 'N/A'), + 'Region': region, + 'VpcID': nat.get('VpcId', 'N/A'), + 'ElasticIPsAssociated': elastic_ips, + 'DataProcessed_GB': 0, + 'NumberOfNATGateways': 1 + }) + + return nat_gateways + except ClientError as e: + logger.error(f"Error collecting NAT Gateway data for region {region}: {str(e)}") + return [] + +def collect_elb(session, region): + """Collect Elastic Load Balancer data for a region.""" + try: + elbs = [] + + # Classic Load Balancers + try: + elb_client = session.client('elb', region_name=region) + classic_response = elb_client.describe_load_balancers() + + for lb in classic_response.get('LoadBalancerDescriptions', []): + elbs.append({ + 'LoadBalancerName': lb.get('LoadBalancerName', 'N/A'), + 'Region': region, + 'Type': 'Classic', + 'NumberOfLoadBalancers': 1, + 'DataProcessed_GB': 0, + 'ActiveConnections': 0 + }) + except ClientError as e: + logger.warning(f"Error collecting Classic ELB data for region {region}: {str(e)}") + + # Application and Network Load Balancers + try: + elbv2_client = session.client('elbv2', region_name=region) + v2_response = elbv2_client.describe_load_balancers() + + for lb in v2_response.get('LoadBalancers', []): + elbs.append({ + 'LoadBalancerName': lb.get('LoadBalancerName', 'N/A'), + 'Region': region, + 'Type': lb.get('Type', 'N/A'), + 'NumberOfLoadBalancers': 1, + 'DataProcessed_GB': 0, + 'ActiveConnections': 0 + }) + except ClientError as e: + logger.warning(f"Error collecting ALB/NLB data for region {region}: {str(e)}") + + return elbs + except Exception as e: + logger.error(f"Error collecting ELB data for region {region}: {str(e)}") + return [] + +def collect_cloudfront(session, region=None): + """ + Collect CloudFront distribution data. + CloudFront is a global service, so region parameter is ignored but kept for API consistency. + """ + try: + # CloudFront is a global service, always use us-east-1 + cf_client = session.client('cloudfront', region_name='us-east-1') + response = cf_client.list_distributions() + + # Track unique distributions by ID to prevent duplicates + unique_distributions = {} + + if 'DistributionList' in response and 'Items' in response['DistributionList']: + for item in response['DistributionList']['Items']: + dist_id = item.get('Id', 'N/A') + if dist_id not in unique_distributions: + # Extract origin domains + origins = [] + for origin in item.get('Origins', {}).get('Items', []): + origins.append(origin.get('DomainName', 'N/A')) + + unique_distributions[dist_id] = { + 'DistributionID': dist_id, + 'DomainName': item.get('DomainName', 'N/A'), + 'Status': item.get('Status', 'N/A'), + 'Enabled': item.get('Enabled', False), + 'Origin': ', '.join(origins), + 'DataTransferGB': 0 + } + + return list(unique_distributions.values()) + except ClientError as e: + logger.error(f"Error collecting CloudFront data: {str(e)}") + return [] + +def collect_data_transfer(region): + """ + Collect Data Transfer cost data for a region. + + Note: This is a placeholder function. In a Lambda environment, + actual data transfer metrics would likely need to be retrieved + from CloudWatch metrics or Cost Explorer, which is beyond + the scope of this script. + """ + return [{ + 'Region': region, + 'InterRegionDataTransfer_GB': 0, + 'DataTransferToInternet_GB': 0, + 'DataTransferWithinAWSServices_GB': 0 + }] + +def collect_vpc_data_transfer(region): + """ + Collect VPC Data Transfer for a region. + + Note: This is a placeholder function. In a Lambda environment, + actual VPC data transfer metrics would need to be retrieved + from CloudWatch metrics, which is beyond the scope of this script. + """ + return [{ + 'Region': region, + 'VPC_DataTransfer_GB': 0 + }] + +def collect_ebs_volumes(session, region): + """Collect EBS volume data for a region.""" + try: + ec2_client = session.client('ec2', region_name=region) + response = ec2_client.describe_volumes() + + volumes = [] + total_size = 0 + unattached_count = 0 + volume_types = {} + + for volume in response.get('Volumes', []): + # Extract volume name from tags + volume_name = None + if volume.get('Tags'): + for tag in volume.get('Tags', []): + if tag['Key'] == 'Name': + volume_name = tag['Value'] + break + + # Count by volume type + volume_type = volume.get('VolumeType', 'unknown') + if volume_type in volume_types: + volume_types[volume_type]['count'] += 1 + volume_types[volume_type]['totalSize'] += volume.get('Size', 0) + else: + volume_types[volume_type] = { + 'count': 1, + 'totalSize': volume.get('Size', 0) + } + + # Check if volume is attached + attachments = [] + for attachment in volume.get('Attachments', []): + attachments.append({ + 'Instance': attachment.get('InstanceId', 'N/A'), + 'Device': attachment.get('Device', 'N/A'), + 'State': attachment.get('State', 'N/A') + }) + + if not attachments: + unattached_count += 1 + + total_size += volume.get('Size', 0) + + volumes.append({ + 'ID': volume.get('VolumeId', 'N/A'), + 'Size': volume.get('Size', 0), + 'Type': volume.get('VolumeType', 'N/A'), + 'State': volume.get('State', 'N/A'), + 'Name': volume_name, + 'Region': region, + 'Attachments': attachments + }) + + return { + 'Volumes': volumes, + 'Summary': { + 'TotalVolumes': len(volumes), + 'TotalSizeGB': total_size, + 'UnattachedVolumes': unattached_count, + 'VolumesByType': volume_types + } + } + except ClientError as e: + logger.error(f"Error collecting EBS volume data for region {region}: {str(e)}") + return { + 'Volumes': [], + 'Summary': { + 'TotalVolumes': 0, + 'TotalSizeGB': 0, + 'UnattachedVolumes': 0, + 'VolumesByType': {} + } + } + +def collect_elastic_ips(session, region): + """Collect Elastic IP data for a region.""" + try: + ec2_client = session.client('ec2', region_name=region) + response = ec2_client.describe_addresses() + + addresses = [] + associated_count = 0 + unassociated_count = 0 + + for address in response.get('Addresses', []): + # Extract EIP name from tags + eip_name = None + if address.get('Tags'): + for tag in address.get('Tags', []): + if tag['Key'] == 'Name': + eip_name = tag['Value'] + break + + # Check if EIP is associated + if address.get('AssociationId'): + associated_count += 1 + else: + unassociated_count += 1 + + addresses.append({ + 'PublicIP': address.get('PublicIp', 'N/A'), + 'Name': eip_name, + 'State': 'Associated' if address.get('AssociationId') else 'Unassociated', + 'Instance': address.get('InstanceId', 'N/A'), + 'Region': region + }) + + return { + 'Addresses': addresses, + 'Summary': { + 'TotalEIPs': len(addresses), + 'UnassociatedEIPs': unassociated_count, + 'AssociatedEIPs': associated_count, + 'ByState': { + 'Associated': associated_count, + 'Unassociated': unassociated_count + } + } + } + except ClientError as e: + logger.error(f"Error collecting Elastic IP data for region {region}: {str(e)}") + return { + 'Addresses': [], + 'Summary': { + 'TotalEIPs': 0, + 'UnassociatedEIPs': 0, + 'AssociatedEIPs': 0, + 'ByState': { + 'Associated': 0, + 'Unassociated': 0 + } + } + } + +def upload_to_s3(data: Dict[str, Any], account_id: str) -> Tuple[bool, str]: + """Upload JSON data to S3 bucket.""" + if not S3_BUCKET_NAME: + logger.error("S3_BUCKET_NAME environment variable not set") + return False, "S3_BUCKET_NAME not configured" + + # Create S3 key with timestamp + timestamp = datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S') + key = f"{S3_KEY_PREFIX.rstrip('/')}/{account_id}/{timestamp}_infra.json" + + # Also create a "latest" version at a consistent path for API Gateway access + latest_key = f"{S3_KEY_PREFIX.rstrip('/')}/{account_id}/latest_infra.json" + + try: + s3_client = boto3.client('s3') + + # Upload with timestamp + s3_client.put_object( + Bucket=S3_BUCKET_NAME, + Key=key, + Body=json.dumps(data, indent=2), + ContentType='application/json' + ) + + # Upload "latest" version + s3_client.put_object( + Bucket=S3_BUCKET_NAME, + Key=latest_key, + Body=json.dumps(data, indent=2), + ContentType='application/json' + ) + + logger.info(f"Successfully uploaded data to s3://{S3_BUCKET_NAME}/{key}") + return True, key + except Exception as e: + logger.error(f"Error uploading to S3: {str(e)}") + return False, str(e) + +def validate_json_structure(data: Dict[str, Any]) -> Tuple[bool, str]: + """ + Validate the JSON structure against expected schema. + Returns (is_valid, error_message) + """ + required_top_level_keys = { + 'EC2Instances', 'AutoScalingGroups', 'RDSDatabases', 'S3Storage', + 'NATGateways', 'ElasticLoadBalancers', 'CloudFrontDistributions', + 'VPCs', 'DataTransferCosts', 'StorageVolumes', 'ElasticIPs' + } + + # Check for required top-level keys + missing_keys = required_top_level_keys - set(data.keys()) + if missing_keys: + return False, f"Missing required top-level keys: {missing_keys}" + + # Additional validation can be added here + + return True, "" + +def collect_comprehensive_data(regions: List[str]) -> Dict[str, Any]: + """Collect all AWS data and return as dict.""" + # Create AWS session using Lambda execution role + session = create_session() + + # Initialize empty data structures + all_ec2_data = [] + all_asg_data = [] + all_rds_data = [] + all_nat_gw_data = [] + all_elb_data = [] + all_vpc_data = [] + all_data_transfer = [] + + # Change these from arrays to combined objects to properly aggregate across regions + combined_ebs_volumes = { + 'Volumes': [], + 'Summary': { + 'TotalVolumes': 0, + 'TotalSizeGB': 0, + 'UnattachedVolumes': 0, + 'VolumesByType': {} + } + } + + combined_elastic_ips = { + 'Addresses': [], + 'Summary': { + 'TotalEIPs': 0, + 'UnassociatedEIPs': 0, + 'AssociatedEIPs': 0, + 'ByState': { + 'Associated': 0, + 'Unassociated': 0 + } + } + } + + # Get account identifier (ID or alias) + account_id = get_account_identifier() + + # Collect global services first + logger.info("Collecting global service data...") + + # S3 data (global service) + s3_data = collect_s3(session) + logger.info("S3 data collected.") + + # CloudFront data (global service) + logger.info("Collecting CloudFront data...") + cloudfront_data = collect_cloudfront(session) + logger.info("CloudFront data collected.") + + # Process each region + for region in regions: + logger.info(f"Collecting data for region: {region}") + + # EC2 + logger.info(f" Collecting EC2 data for {region}...") + region_ec2 = collect_ec2(session, region) + all_ec2_data.extend(region_ec2) + + # RDS + logger.info(f" Collecting RDS data for {region}...") + region_rds = collect_rds(session, region) + all_rds_data.extend(region_rds) + + # Auto Scaling Groups + logger.info(f" Collecting ASG data for {region}...") + region_asg = collect_asg(session, region) + all_asg_data.extend(region_asg) + + # NAT Gateways + logger.info(f" Collecting NAT Gateway data for {region}...") + region_nat_gw = collect_nat_gw(session, region) + all_nat_gw_data.extend(region_nat_gw) + + # Elastic Load Balancers + logger.info(f" Collecting ELB data for {region}...") + region_elb = collect_elb(session, region) + all_elb_data.extend(region_elb) + + # VPCs + logger.info(f" Collecting VPC data for {region}...") + region_vpc = collect_vpc_data_transfer(region) + all_vpc_data.extend(region_vpc) + + # Data Transfer Costs + logger.info(f" Collecting Data Transfer Costs for {region}...") + region_data_transfer = collect_data_transfer(region) + all_data_transfer.extend(region_data_transfer) + + # EBS Volumes + logger.info(f" Collecting EBS Volumes for {region}...") + region_ebs_volumes = collect_ebs_volumes(session, region) + if region_ebs_volumes: + combined_ebs_volumes['Volumes'].extend(region_ebs_volumes.get('Volumes', [])) + region_summary = region_ebs_volumes.get('Summary', {}) + combined_ebs_volumes['Summary']['TotalVolumes'] += region_summary.get('TotalVolumes', 0) + combined_ebs_volumes['Summary']['TotalSizeGB'] += region_summary.get('TotalSizeGB', 0) + combined_ebs_volumes['Summary']['UnattachedVolumes'] += region_summary.get('UnattachedVolumes', 0) + + # Merge volume types + region_volume_types = region_summary.get('VolumesByType', {}) + for vol_type, info in region_volume_types.items(): + if vol_type in combined_ebs_volumes['Summary']['VolumesByType']: + combined_ebs_volumes['Summary']['VolumesByType'][vol_type]['count'] += info.get('count', 0) + combined_ebs_volumes['Summary']['VolumesByType'][vol_type]['totalSize'] += info.get('totalSize', 0) + else: + combined_ebs_volumes['Summary']['VolumesByType'][vol_type] = info + + # Elastic IPs + logger.info(f" Collecting Elastic IPs for {region}...") + region_elastic_ips = collect_elastic_ips(session, region) + if region_elastic_ips: + combined_elastic_ips['Addresses'].extend(region_elastic_ips.get('Addresses', [])) + region_summary = region_elastic_ips.get('Summary', {}) + combined_elastic_ips['Summary']['TotalEIPs'] += region_summary.get('TotalEIPs', 0) + combined_elastic_ips['Summary']['UnassociatedEIPs'] += region_summary.get('UnassociatedEIPs', 0) + combined_elastic_ips['Summary']['AssociatedEIPs'] += region_summary.get('AssociatedEIPs', 0) + combined_elastic_ips['Summary']['ByState']['Associated'] += region_summary.get('ByState', {}).get('Associated', 0) + combined_elastic_ips['Summary']['ByState']['Unassociated'] += region_summary.get('ByState', {}).get('Unassociated', 0) + + logger.info(f" Region {region} data collection complete") + + # Create final JSON output + logger.info("Creating final JSON output...") + + # Combine all collected data + complete_data = { + 'EC2Instances': all_ec2_data, + 'AutoScalingGroups': all_asg_data, + 'RDSDatabases': all_rds_data, + 'S3Storage': s3_data, + 'NATGateways': all_nat_gw_data, + 'ElasticLoadBalancers': all_elb_data, + 'CloudFrontDistributions': cloudfront_data, + 'VPCs': all_vpc_data, + 'DataTransferCosts': all_data_transfer, + 'StorageVolumes': { + 'Details': combined_ebs_volumes['Volumes'], + 'Summary': combined_ebs_volumes['Summary'] + }, + 'ElasticIPs': { + 'Details': combined_elastic_ips['Addresses'], + 'Summary': combined_elastic_ips['Summary'] + }, + 'MetaData': { + 'AccountID': account_id, + 'RegionsScanned': regions, + 'CollectionTimestamp': datetime.utcnow().isoformat(), + 'LambdaFunction': 'api_lambda_data-collection' + } + } + + return complete_data + +def lambda_handler(event, context): + """ + Lambda function handler for AWS infrastructure data collection. + """ + logger.info("Starting infrastructure data collection Lambda") + logger.info(f"Event: {json.dumps(event)}") + + # Basic validation of S3 configuration + if not S3_BUCKET_NAME: + error_msg = "S3_BUCKET_NAME environment variable not set" + logger.error(error_msg) + return { + 'statusCode': 500, + 'body': json.dumps({'error': error_msg}) + } + + # Get regions to process + regions = get_regions(event) + logger.info(f"Will scan the following regions: {regions}") + + # Get account identifier + account_id = get_account_identifier() + logger.info(f"Collecting data for account: {account_id}") + + # Collect data + infra_data = collect_comprehensive_data(regions) + + # Validate JSON structure + is_valid, error_message = validate_json_structure(infra_data) + if not is_valid: + error_msg = f"Invalid JSON structure: {error_message}" + logger.error(error_msg) + return { + 'statusCode': 500, + 'body': json.dumps({'error': error_msg}) + } + + # Upload to S3 + upload_success, s3_key = upload_to_s3(infra_data, account_id) + if not upload_success: + error_msg = f"Failed to upload data to S3: {s3_key}" + logger.error(error_msg) + return { + 'statusCode': 500, + 'body': json.dumps({'error': error_msg}) + } + + # Return success response + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': "Successfully collected and stored infrastructure data", + 'account': account_id, + 'regions_scanned': regions, + 's3_bucket': S3_BUCKET_NAME, + 's3_key': s3_key, + 'latest_key': f"{S3_KEY_PREFIX.rstrip('/')}/{account_id}/latest_infra.json" + }) + } + +# For local testing +if __name__ == "__main__": + test_event = { + 'specific_region': 'us-east-1' # Test with a single region for speed + } + print(json.dumps(lambda_handler(test_event, None), indent=2)) From 9c1d7906f7211ee94c89537dc3b743916ed7c08b Mon Sep 17 00:00:00 2001 From: Sunny Date: Fri, 30 May 2025 13:03:21 +0100 Subject: [PATCH 2/2] Updated code to work with new Eventbridge Schedule functionality in aws_admin_tools which Matej added --- roles/aws/aws_admin_tools/defaults/main.yml | 8 +++++--- ..._explorer_data.j2 => api_get_cost_explorer_data.py.j2} | 0 ...data_to_s3.j2 => schedule_save_infra_data_to_s3.py.j2} | 0 3 files changed, 5 insertions(+), 3 deletions(-) rename roles/aws/aws_admin_tools/templates/{api_get_cost_explorer_data.j2 => api_get_cost_explorer_data.py.j2} (100%) rename roles/aws/aws_admin_tools/templates/{schedule_save_infra_data_to_s3.j2 => schedule_save_infra_data_to_s3.py.j2} (100%) diff --git a/roles/aws/aws_admin_tools/defaults/main.yml b/roles/aws/aws_admin_tools/defaults/main.yml index 63f558e2f..8bf10dbbe 100644 --- a/roles/aws/aws_admin_tools/defaults/main.yml +++ b/roles/aws/aws_admin_tools/defaults/main.yml @@ -46,6 +46,7 @@ aws_admin_tools: action: - "wafv2:GetWebACL" - name: "get_cost_explorer_data" + resource: api type: GET policies: [] inline_policies: @@ -55,9 +56,9 @@ aws_admin_tools: - "lambda:InvokeFunction" - "ce:GetCostAndUsage" - name: "save_infra_data_to_s3" - schedule: - cron: "cron(0 6,12 ? * MON-FRI *)" - timeout: 300 + resource: schedule + cron: "cron(0 6,12 ? * MON-FRI *)" + timeout: 300 policies: [] inline_policies: name: "save_infra_data_to_s3" @@ -76,6 +77,7 @@ aws_admin_tools: - "sts:GetCallerIdentity" - "s3:PutObject" - name: "get_infra_data_from_s3" # This is the bit that sets up the API Gateway to pull and share infra data over API. No Lambda function needed here though. How do we do this? See docs + resource: api type: GET policies: [] inline_policies: diff --git a/roles/aws/aws_admin_tools/templates/api_get_cost_explorer_data.j2 b/roles/aws/aws_admin_tools/templates/api_get_cost_explorer_data.py.j2 similarity index 100% rename from roles/aws/aws_admin_tools/templates/api_get_cost_explorer_data.j2 rename to roles/aws/aws_admin_tools/templates/api_get_cost_explorer_data.py.j2 diff --git a/roles/aws/aws_admin_tools/templates/schedule_save_infra_data_to_s3.j2 b/roles/aws/aws_admin_tools/templates/schedule_save_infra_data_to_s3.py.j2 similarity index 100% rename from roles/aws/aws_admin_tools/templates/schedule_save_infra_data_to_s3.j2 rename to roles/aws/aws_admin_tools/templates/schedule_save_infra_data_to_s3.py.j2