CQ4CIPSov8TtTznjVvJ1AWS RDS Cost holistic
AWS RDS Cost holistic
There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.
inputs
outputs
import pandas as pd
import numpy as np
from datetime import datetime, timezone
# Assuming df is your DataFrame with AWS Cost and Usage Report data
if df is not None:
# Convert date columns and ensure numeric columns are correct
df['lineItem/UsageStartDate'] = pd.to_datetime(df['lineItem/UsageStartDate'])
numeric_cols = ['lineItem/UsageAmount', 'lineItem/UnblendedCost', 'reservation/EffectiveCost',
'reservation/UnusedAmortizedUpfrontFeeForBillingPeriod', 'reservation/UnusedRecurringFee',
'reservation/AmortizedUpfrontFeeForBillingPeriod']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
df.fillna(0, inplace=True)
# Filter for RDS and specific line item types
rds_df = df[(df['product/ProductName'] == 'Amazon Relational Database Service') &
(df['lineItem/LineItemType'].isin(['DiscountedUsage', 'Usage', 'Fee', 'RIFee']))]
# Calculations based on CASE WHEN conditions
conditions = [
(rds_df['lineItem/LineItemType'] == 'DiscountedUsage'),
(rds_df['lineItem/LineItemType'] == 'Usage'),
(rds_df['lineItem/LineItemType'] == 'RIFee')
]
choices = [
rds_df['lineItem/UsageAmount'],
rds_df['lineItem/UsageAmount'],
0
]
rds_df['sum_lineItem_UsageAmount'] = np.select(conditions, choices, default=0)
rds_df['sum_amortized_cost'] = np.select(conditions,
[rds_df['reservation/EffectiveCost'],
rds_df['reservation/UnusedAmortizedUpfrontFeeForBillingPeriod'] + rds_df['reservation/UnusedRecurringFee'],
rds_df['lineItem/UnblendedCost']],
default=rds_df['lineItem/UnblendedCost'])
rds_df['sum_ri_trueup'] = np.where(rds_df['lineItem/LineItemType'] == 'RIFee',
-rds_df['reservation/AmortizedUpfrontFeeForBillingPeriod'], 0)
# Grouping and aggregation with 'pricing/term'
grouped_result = rds_df.groupby([
'bill/PayerAccountId', 'lineItem/UsageAccountId',
rds_df['lineItem/UsageStartDate'].dt.date, 'product/instanceType',
'lineItem/Operation', 'lineItem/UsageType', 'lineItem/LineItemType',
'pricing/term', 'product/productFamily', rds_df['lineItem/ResourceId'].str.split(':', expand=True)[6],
'product/databaseEngine'
]).agg(
sum_lineItem_UnblendedCost=('lineItem/UnblendedCost', 'sum'),
sum_lineItem_UsageAmount=('sum_lineItem_UsageAmount', 'sum'),
sum_amortized_cost=('sum_amortized_cost', 'sum'),
sum_ri_trueup=('sum_ri_trueup', 'sum')
).reset_index()
# Visualization with context.plot
context.plot.xlabel = 'Date'
context.plot.ylabel = 'Cost'
context.plot.title = 'AWS RDS Cost Analysis'
for _, row in grouped_result.iterrows():
context.plot.add_trace(name=f"RDS - {row['product/instanceType']}", xpts=[row['lineItem/UsageStartDate']],
ypts=[row['sum_lineItem_UnblendedCost']], tracetype="line")
print("Data analysis and plotting complete.")
else:
print("DataFrame is empty. Exiting.")
copied