Sign in

AWS RDS Cost holistic

There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.


import pandas as pd import numpy as np from datetime import datetime, timezone # Assuming df is your DataFrame with AWS Cost and Usage Report data if df is not None: # Convert date columns and ensure numeric columns are correct df['lineItem/UsageStartDate'] = pd.to_datetime(df['lineItem/UsageStartDate']) numeric_cols = ['lineItem/UsageAmount', 'lineItem/UnblendedCost', 'reservation/EffectiveCost', 'reservation/UnusedAmortizedUpfrontFeeForBillingPeriod', 'reservation/UnusedRecurringFee', 'reservation/AmortizedUpfrontFeeForBillingPeriod'] df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce') df.fillna(0, inplace=True) # Filter for RDS and specific line item types rds_df = df[(df['product/ProductName'] == 'Amazon Relational Database Service') & (df['lineItem/LineItemType'].isin(['DiscountedUsage', 'Usage', 'Fee', 'RIFee']))] # Calculations based on CASE WHEN conditions conditions = [ (rds_df['lineItem/LineItemType'] == 'DiscountedUsage'), (rds_df['lineItem/LineItemType'] == 'Usage'), (rds_df['lineItem/LineItemType'] == 'RIFee') ] choices = [ rds_df['lineItem/UsageAmount'], rds_df['lineItem/UsageAmount'], 0 ] rds_df['sum_lineItem_UsageAmount'] = np.select(conditions, choices, default=0) rds_df['sum_amortized_cost'] = np.select(conditions, [rds_df['reservation/EffectiveCost'], rds_df['reservation/UnusedAmortizedUpfrontFeeForBillingPeriod'] + rds_df['reservation/UnusedRecurringFee'], rds_df['lineItem/UnblendedCost']], default=rds_df['lineItem/UnblendedCost']) rds_df['sum_ri_trueup'] = np.where(rds_df['lineItem/LineItemType'] == 'RIFee', -rds_df['reservation/AmortizedUpfrontFeeForBillingPeriod'], 0) # Grouping and aggregation with 'pricing/term' grouped_result = rds_df.groupby([ 'bill/PayerAccountId', 'lineItem/UsageAccountId', rds_df['lineItem/UsageStartDate'].dt.date, 'product/instanceType', 'lineItem/Operation', 'lineItem/UsageType', 'lineItem/LineItemType', 'pricing/term', 'product/productFamily', rds_df['lineItem/ResourceId'].str.split(':', expand=True)[6], 'product/databaseEngine' ]).agg( sum_lineItem_UnblendedCost=('lineItem/UnblendedCost', 'sum'), sum_lineItem_UsageAmount=('sum_lineItem_UsageAmount', 'sum'), sum_amortized_cost=('sum_amortized_cost', 'sum'), sum_ri_trueup=('sum_ri_trueup', 'sum') ).reset_index() # Visualization with context.plot context.plot.xlabel = 'Date' context.plot.ylabel = 'Cost' context.plot.title = 'AWS RDS Cost Analysis' for _, row in grouped_result.iterrows(): context.plot.add_trace(name=f"RDS - {row['product/instanceType']}", xpts=[row['lineItem/UsageStartDate']], ypts=[row['sum_lineItem_UnblendedCost']], tracetype="line") print("Data analysis and plotting complete.") else: print("DataFrame is empty. Exiting.")
copied