Sign in

test dataframe operations for cur

There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.


  1. 1

    AWS RDS Cost Analysis based on Instance Type

    There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.

    This task involves comparing daily cost of all RDS Instances in an AWS Account, helping organizations choose the most economical options for their database needs while optimizing their AWS budget

    import pandas as pd from datetime import datetime, timedelta, timezone # Assuming df is your DataFrame and last_n_days is defined # Example: last_n_days = 30 if df is not None: #print("DataFrame Columns:", list(df.columns)) # Convert 'lineItem/UsageStartDate' to datetime and ensure it's in UTC df['lineItem/UsageStartDate'] = pd.to_datetime(df['lineItem/UsageStartDate'], utc=True) # Filter out negative costs df = df[df['lineItem/UnblendedCost'] >= 0] # Adjust the cutoff date to the start of the day in UTC cutoff_date = (datetime.utcnow() - timedelta(days=last_n_days)).replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc) rds_df = df[(df['lineItem/UsageStartDate'] > cutoff_date) & (df['product/ProductName'] == 'Amazon Relational Database Service') & (df['product/instanceType'].notna())] '''# First full day after cutoff first_full_day_after_cutoff = cutoff_date + timedelta(days=1) rds_df = rds_df[rds_df['lineItem/UsageStartDate'] >= first_full_day_after_cutoff]''' if not rds_df.empty: # Group by 'lineItem/UsageStartDate' and 'product/instanceType' for RDS rds_result = rds_df.groupby([rds_df['lineItem/UsageStartDate'].dt.date, 'product/instanceType']).agg( usage_hours=('lineItem/UsageAmount', 'sum'), usage_cost=('lineItem/UnblendedCost', 'sum') ).reset_index() #print("Number of rows in result DataFrame:", len(rds_result)) # Plotting the RDS cost data context.plot.xlabel = 'Date' context.plot.ylabel = 'RDS Usage Cost($)' context.plot.title = f'RDS Cost Usage (Last {last_n_days} Days)' for instance_type in rds_result['product/instanceType'].unique(): instance_data = rds_result[rds_result['product/instanceType'] == instance_type] x = instance_data['lineItem/UsageStartDate'].tolist() y = instance_data['usage_cost'].tolist() #print(f"Instance Type: {instance_type}") #print(f"Sample Dates (x): {x[:5]}") #print(f"Sample Costs (y): {y[:5]}") context.plot.add_trace(name=f"RDS- {instance_type}", xpts=x, ypts=y, tracetype="line") else: print("No data available for RDS in the specified time frame.") else: print("DataFrame is empty. Exiting.") #context.proceed=False
    copied
    1
  2. 2

    Get unique values column wise in a pandas data frame

    There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.


    import pandas as pd from datetime import datetime, timedelta # Define last_n_days parameter #last_n_days = 30 # You can change this value as needed if df is not None: print("Analyzing and visualizing daily AWS costs...") # Filter out negative values filter_condition = (df['lineItem/UnblendedCost'] >= 0) #| (df['lineItem/UnblendedCost'] <= -0.001) df = df[filter_condition] # Convert 'lineItem/UsageStartDate' to datetime df['lineItem/UsageStartDate'] = pd.to_datetime(df['lineItem/UsageStartDate']) # Extract the day of the month and create a new column 'day' df['day'] = df['lineItem/UsageStartDate'].dt.date # Get the maximum date in the dataset max_date = df['day'].max() # Calculate the start date based on the max date and last_n_days start_date = max_date - timedelta(days=last_n_days) # Filter the DataFrame to only include dates greater than or equal to the start date filtered_df = df[df['day'] >= start_date] # Group by day and sum 'lineItem/UnblendedCost' for each group daily_costs = filtered_df.groupby('day')['lineItem/UnblendedCost'].sum() # Extract x and y values x = daily_costs.index.tolist() # This gets the dates y = daily_costs.values.tolist() # This gets the corresponding costs #print(df.head()) # Print the first few rows of the original DataFrame #print(daily_costs) # Print the daily costs after grouping and summing #print(y) # Print the y values extracted from daily_costs # Set the properties for your plot context.plot.xlabel = 'Date' context.plot.ylabel = 'Cost ($)' context.plot.title = f'Daily AWS Costs (Last {last_n_days} Days)' context.plot.add_trace(name=f'Daily AWS Costs (Last {last_n_days} Days)', xpts=x, ypts=y, tracetype="lines") else: print("Failed to fetch data. Exiting.") context.proceed=False ''' columns= ['lineItem/ProductCode', 'lineItem/Description', 'lineItem/UsageType', 'lineItem/Operation', 'product/instanceFamily', 'product/instanceType', 'product/instanceTypeFamily'] for column_to_investigate in columns: # Checking if the column exists in the DataFrame if column_to_investigate in df.columns: # Print all unique values in the column unique_values = df[column_to_investigate].unique() print(f"Unique values in '{column_to_investigate}':\n{unique_values}") else: print(f"The column '{column_to_investigate}' does not exist in the DataFrame.") '''
    copied
    2
  3. 3

    rds cost analysis by instance id

    There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.
    import pandas as pd from datetime import datetime, timedelta, timezone # Assuming df is your DataFrame and last_n_days is defined # Example: last_n_days = 30 if df is not None: print("DataFrame Columns:", list(df.columns)) # Convert 'lineItem/UsageStartDate' to datetime df['lineItem/UsageStartDate'] = pd.to_datetime(df['lineItem/UsageStartDate']) # Filter out negative costs df = df[df['lineItem/UnblendedCost'] >= 0] # Filter rows for the last N days for RDS cutoff_date = datetime.utcnow().replace(tzinfo=timezone.utc) - timedelta(days=last_n_days) rds_df = df[(df['lineItem/UsageStartDate'] > cutoff_date) & (df['lineItem/ProductCode'] == 'AmazonRDS') & (df['lineItem/ResourceId'].notna())] if not rds_df.empty: # Group by 'lineItem/UsageStartDate' and 'lineItem/ResourceId' rds_result = rds_df.groupby([rds_df['lineItem/UsageStartDate'].dt.date, 'lineItem/ResourceId']).agg( usage_hours=('lineItem/UsageAmount', 'sum'), usage_cost=('lineItem/UnblendedCost', 'sum') ).reset_index() print("Number of rows in result DataFrame:", len(rds_result)) # Plotting the RDS cost data by instance ID context.plot.xlabel = 'Date' context.plot.ylabel = 'RDS Usage Cost($)' context.plot.title = f'RDS Cost by Instance ID (Last {last_n_days} Days)' for instance_id in rds_result['lineItem/ResourceId'].unique(): instance_data = rds_result[rds_result['lineItem/ResourceId'] == instance_id] x = instance_data['lineItem/UsageStartDate'].tolist() y = instance_data['usage_cost'].tolist() context.plot.add_trace(name=f"RDS Instance - {instance_id}", xpts=x, ypts=y, tracetype="line") else: print("No RDS data available for the specified time frame.") else: print("DataFrame is empty. Exiting.")
    copied
    3
  4. 4

    AWS RDS Cost holistic

    There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.


    import pandas as pd import numpy as np from datetime import datetime, timezone # Assuming df is your DataFrame with AWS Cost and Usage Report data if df is not None: # Convert date columns and ensure numeric columns are correct df['lineItem/UsageStartDate'] = pd.to_datetime(df['lineItem/UsageStartDate']) numeric_cols = ['lineItem/UsageAmount', 'lineItem/UnblendedCost', 'reservation/EffectiveCost', 'reservation/UnusedAmortizedUpfrontFeeForBillingPeriod', 'reservation/UnusedRecurringFee', 'reservation/AmortizedUpfrontFeeForBillingPeriod'] df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce') df.fillna(0, inplace=True) # Filter for RDS and specific line item types rds_df = df[(df['product/ProductName'] == 'Amazon Relational Database Service') & (df['lineItem/LineItemType'].isin(['DiscountedUsage', 'Usage', 'Fee', 'RIFee']))] # Calculations based on CASE WHEN conditions conditions = [ (rds_df['lineItem/LineItemType'] == 'DiscountedUsage'), (rds_df['lineItem/LineItemType'] == 'Usage'), (rds_df['lineItem/LineItemType'] == 'RIFee') ] choices = [ rds_df['lineItem/UsageAmount'], rds_df['lineItem/UsageAmount'], 0 ] rds_df['sum_lineItem_UsageAmount'] = np.select(conditions, choices, default=0) rds_df['sum_amortized_cost'] = np.select(conditions, [rds_df['reservation/EffectiveCost'], rds_df['reservation/UnusedAmortizedUpfrontFeeForBillingPeriod'] + rds_df['reservation/UnusedRecurringFee'], rds_df['lineItem/UnblendedCost']], default=rds_df['lineItem/UnblendedCost']) rds_df['sum_ri_trueup'] = np.where(rds_df['lineItem/LineItemType'] == 'RIFee', -rds_df['reservation/AmortizedUpfrontFeeForBillingPeriod'], 0) # Grouping and aggregation with 'pricing/term' grouped_result = rds_df.groupby([ 'bill/PayerAccountId', 'lineItem/UsageAccountId', rds_df['lineItem/UsageStartDate'].dt.date, 'product/instanceType', 'lineItem/Operation', 'lineItem/UsageType', 'lineItem/LineItemType', 'pricing/term', 'product/productFamily', rds_df['lineItem/ResourceId'].str.split(':', expand=True)[6], 'product/databaseEngine' ]).agg( sum_lineItem_UnblendedCost=('lineItem/UnblendedCost', 'sum'), sum_lineItem_UsageAmount=('sum_lineItem_UsageAmount', 'sum'), sum_amortized_cost=('sum_amortized_cost', 'sum'), sum_ri_trueup=('sum_ri_trueup', 'sum') ).reset_index() # Visualization with context.plot context.plot.xlabel = 'Date' context.plot.ylabel = 'Cost' context.plot.title = 'AWS RDS Cost Analysis' for _, row in grouped_result.iterrows(): context.plot.add_trace(name=f"RDS - {row['product/instanceType']}", xpts=[row['lineItem/UsageStartDate']], ypts=[row['sum_lineItem_UnblendedCost']], tracetype="line") print("Data analysis and plotting complete.") else: print("DataFrame is empty. Exiting.")
    copied
    4
  5. 5

    rds costs by instance id

    There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.


    import pandas as pd from datetime import datetime, timedelta, timezone # Assuming df is your DataFrame and last_n_days is defined # Example: last_n_days = 30 if df is not None: print("DataFrame Columns:", list(df.columns)) # Convert 'lineItem/UsageStartDate' to datetime df['lineItem/UsageStartDate'] = pd.to_datetime(df['lineItem/UsageStartDate']) # Filter out negative costs df = df[df['lineItem/UnblendedCost'] >= 0] # Filter rows for the last N days for RDS cutoff_date = datetime.utcnow().replace(tzinfo=timezone.utc) - timedelta(days=last_n_days) rds_df = df[(df['lineItem/UsageStartDate'] > cutoff_date) & (df['lineItem/ProductCode'] == 'AmazonRDS') & (df['lineItem/ResourceId'].notna())] if not rds_df.empty: # Group by 'lineItem/UsageAccountId', 'lineItem/ResourceId', and 'lineItem/UsageStartDate' grouped_rds = rds_df.groupby(['lineItem/UsageAccountId', 'lineItem/ResourceId', rds_df['lineItem/UsageStartDate'].dt.date]).agg( total_usage=('lineItem/UsageAmount', 'sum'), total_cost=('lineItem/UnblendedCost', 'sum') ).reset_index() print("Number of rows in result DataFrame:", len(grouped_rds)) # Plotting the RDS cost data for each instance ID context.plot.xlabel = 'Date' context.plot.ylabel = 'RDS Instance Usage Cost($)' context.plot.title = f'RDS Instance Cost Usage (Last {last_n_days} Days)' for instance_id in grouped_rds['lineItem/ResourceId'].unique(): instance_data = grouped_rds[grouped_rds['lineItem/ResourceId'] == instance_id] x = instance_data['lineItem/UsageStartDate'].tolist() y = instance_data['total_cost'].tolist() print(f"Instance ID: {instance_id}") print(f"Dates (x values): {x}") print(f"Costs (y values): {y}") context.plot.add_trace(name=f"RDS Instance - {instance_id}", xpts=x, ypts=y, tracetype="line") else: print("No RDS data available in the specified time frame.") else: print("DataFrame is empty. Exiting.")
    copied
    5