Sign in
agent:

RDS Cost Analysis

There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.
  1. 1

    Aggregate and Visualize Comprehensive RDS CPU Utilization

    There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.

    This task aggregates CPU utilization data for running RDS instances across an AWS account, computes the average CPU usage over a specified period, and plots the average to help in assessing overall resource efficiency.

    import boto3 from datetime import datetime, timedelta region_name=None # None when you want to run the script for all regions #last_n_days = 30 # AWS Credentials - replace with your method to retrieve AWS credentials creds = _get_creds(cred_label)['creds'] # Placeholder function access_key = creds['username'] secret_key = creds['password'] def get_aws_regions(): """Get a list of all AWS regions.""" ec2 = boto3.client('ec2',aws_access_key_id=access_key,aws_secret_access_key=secret_key,region_name='us-east-1') regions = ec2.describe_regions() return [region['RegionName'] for region in regions['Regions']] def fetch_rds_instances(region): """Fetch all RDS instances in a specific region.""" rds = boto3.client('rds',aws_access_key_id=access_key,aws_secret_access_key=secret_key, region_name=region) paginator = rds.get_paginator('describe_db_instances') page_iterator = paginator.paginate() rds_instances = [] for page in page_iterator: for instance in page['DBInstances']: rds_instances.append({ 'DBInstanceIdentifier': instance['DBInstanceIdentifier'], 'Region': region, }) return rds_instances def fetch_cpu_utilization(db_instance_identifier, region, start_time, end_time): """Fetch the average CPU utilization for an RDS instance.""" cloudwatch = boto3.client('cloudwatch',aws_access_key_id=access_key,aws_secret_access_key=secret_key, region_name=region) metrics = cloudwatch.get_metric_statistics( Namespace='AWS/RDS', MetricName='CPUUtilization', Dimensions=[{'Name': 'DBInstanceIdentifier', 'Value': db_instance_identifier}], StartTime=start_time, EndTime=end_time, Period=3600, Statistics=['Average'] ) data_points = metrics.get('Datapoints', []) if data_points: avg_cpu = sum(dp['Average'] for dp in data_points) / len(data_points) else: avg_cpu = 0 return avg_cpu def plot_cpu_utilization(region_name=None, last_n_days=7): """Plot CPU utilization for RDS instances.""" start_time = datetime.utcnow() - timedelta(days=last_n_days) end_time = datetime.utcnow() regions = [region_name] if region_name else get_aws_regions() for region in regions: rds_instances = fetch_rds_instances(region) avg_utilizations = [] for instance in rds_instances: avg_cpu = fetch_cpu_utilization(instance['DBInstanceIdentifier'], region, start_time, end_time) avg_utilizations.append((instance['DBInstanceIdentifier'], avg_cpu)) avg_utilizations.sort(key=lambda x: x[1], reverse=True) top_instances = avg_utilizations[:3] bottom_instances = avg_utilizations[-3:] instance_ids = [x[0] for x in top_instances + bottom_instances] utilizations = [x[1] for x in top_instances + bottom_instances] # Plotting context.plot.add_trace( name="CPU Utilization", xpts=instance_ids, ypts=utilizations, tracetype='bar' ) context.plot.xlabel = 'Instance ID' context.plot.ylabel = 'Average CPU Utilization (%)' context.plot.title = f'Top & Bottom 3 RDS Instances by CPU Utilization (Last {last_n_days} Days)' # Example usage plot_cpu_utilization(region_name, last_n_days) # For all regions # plot_cpu_utilization(region_name='us-east-1', last_n_days=30) # For a specific region
    copied
    1
  2. 2

    Daily AWS RDS Costs using Athena

    There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.

    Specifically examine daily costs associated with AWS RDS, utilizing Athena to query CUR data for detailed spending analysis on database services.

    import boto3 import time from botocore.exceptions import ClientError, BotoCoreError creds = _get_creds(cred_label)['creds'] access_key = creds['username'] secret_key = creds['password'] def run_athena_query(query, database, s3_output): athena_client = boto3.client('athena', aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name="us-east-1") try: response = athena_client.start_query_execution( QueryString=query, QueryExecutionContext={'Database': database}, ResultConfiguration={'OutputLocation': s3_output} ) query_execution_id = response['QueryExecutionId'] print(f"Query execution started, ID: {query_execution_id}") return query_execution_id except (ClientError, BotoCoreError) as e: print(f"Failed to execute query: {e}") return None def check_query_status(athena_client, query_execution_id): while True: try: response = athena_client.get_query_execution(QueryExecutionId=query_execution_id) status = response['QueryExecution']['Status']['State'] if status == 'SUCCEEDED': print(f"Query {query_execution_id} succeeded.") return True elif status in ['FAILED', 'CANCELLED']: print(f"Query {query_execution_id} failed or was cancelled.") return False time.sleep(5) except (ClientError, BotoCoreError) as e: print(f"Error checking query status: {e}") return False def get_query_results(athena_client, query_execution_id): try: response = athena_client.get_query_results(QueryExecutionId=query_execution_id) result_data = response['ResultSet']['Rows'] header = [col['VarCharValue'] for col in result_data[0]['Data']] results = [[col['VarCharValue'] for col in row['Data']] for row in result_data[1:]] return header, results except (ClientError, BotoCoreError) as e: print(f"Error retrieving query results: {e}") return None, None def visualize_data(dates, costs, last_n_days): print("x values (dates):", dates) print("y values (costs):", costs) context.plot.add_trace(name=f'Amazon RDS Costs (Last {last_n_days} Days)', xpts=dates, ypts=costs, tracetype="lines") context.plot.xlabel = 'Date' context.plot.ylabel = 'Cost ($)' context.plot.title = f'Daily Amazon RDS Costs (Last {last_n_days} Days excluding last 2 days)' # last_n_days = 7 # To be passed as an inpur parameter query = f""" SELECT DATE(line_item_usage_start_date) AS usage_date, line_item_product_code AS product_code, product_database_engine AS database_engine, SUM(line_item_unblended_cost) AS daily_cost FROM my_cur_report_athena WHERE line_item_product_code = 'AmazonRDS' AND line_item_usage_start_date >= DATE_ADD('day', -{last_n_days} - 2, CURRENT_DATE) AND line_item_usage_start_date < DATE_ADD('day', -2, CURRENT_DATE) AND product_database_engine IS NOT NULL AND product_database_engine != '' GROUP BY DATE(line_item_usage_start_date), line_item_product_code, product_database_engine ORDER BY usage_date, product_code; """ database = 'athenacurcfn_my_c_u_r_report_athena' #s3_output = 's3://dagknows-cur-logging-bucket-athena-query-results-188379622596/dev_query_results/' #bucket_name = 'dagknows-cur-logging-bucket-athena-query-results-188379622596' # To be dynamically received from upstream task prefix_path = 'dev_query_results' s3_output = f"s3://{bucket_name}/{prefix_path}/" query_execution_id = run_athena_query(query, database, s3_output) if query_execution_id: athena_client = boto3.client('athena', aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name="us-east-1") if check_query_status(athena_client, query_execution_id): header, results = get_query_results(athena_client, query_execution_id) if results: dates = [row[0] for row in results] costs = [float(row[3]) for row in results] visualize_data(dates, costs, last_n_days) else: print("No results to show.") else: print("Query did not succeed. No results to show.") else: print("Query execution failed. Exiting.")
    copied
    2