Sign in
agent:

Deleting AWS ECS(Elastic Container Service) clusters having low CPU Utilization

There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.

This runbook is designed to efficiently manage AWS Elastic Container Service (ECS) resources. It scans specified AWS regions to identify ECS clusters with low average CPU utilization, based on a user-defined threshold. Once these underutilized clusters are identified, the runbook proceeds to delete them, thereby optimizing resource usage and potentially reducing operational costs.

cpu_threshold=20 # Hardcoded for one time result
copied
  1. 1

    Filter out AWS ECS Clusters having low CPU Utilization

    There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.

    This task scans multiple AWS regions to identify ECS clusters that are underutilized in terms of CPU, based on a set threshold. This enables organizations to easily spot clusters that are consuming resources without delivering optimal performance, thereby helping in decision-making processes related to scaling, resource allocation, or decommissioning. This task aims to improve efficiency and reduce costs by flagging these low-activity clusters for further action.

    import boto3 import datetime from botocore.exceptions import ClientError creds = _get_creds(cred_label)['creds'] access_key = creds['username'] secret_key = creds['password'] def get_ecs_clusters_with_low_cpu_utilization(regions, threshold): """ Identifies ECS clusters with low average CPU utilization across multiple AWS regions. Args: regions (List[str]): List of AWS regions to check. threshold (int): CPU utilization percentage below which a cluster is considered underutilized. Returns: List[dict]: List of dictionaries containing cluster and service/task details. """ low_cpu_clusters = [] # List to store details of low CPU utilization clusters # Loop through each region for region in regions: try: # Initialize ECS and CloudWatch clients for the region ecs = boto3.client('ecs', aws_access_key_id=access_key,aws_secret_access_key=secret_key,region_name=region) cloudwatch = boto3.client('cloudwatch', aws_access_key_id=access_key,aws_secret_access_key=secret_key,region_name=region) # Fetch all cluster ARNs in the region clusters = ecs.list_clusters()['clusterArns'] # Loop through each cluster for cluster in clusters: cluster_name = cluster.split('/')[-1] # Extract the cluster name from the ARN # Define the time range for CloudWatch metrics as the last 60 minutes end_time = datetime.datetime.utcnow() start_time = end_time - datetime.timedelta(hours=1) # Fetch service ARNs in the cluster services = ecs.list_services(cluster=cluster_name)['serviceArns'] # Loop through each service to fetch its average CPU utilization for service in services: service_name = service.split('/')[-1] # Extract the service name from the ARN # Get average CPU utilization from CloudWatch cpu_response = cloudwatch.get_metric_statistics( Namespace='AWS/ECS', MetricName='CPUUtilization', Dimensions=[ {'Name': 'ClusterName', 'Value': cluster_name}, {'Name': 'ServiceName', 'Value': service_name}, ], StartTime=start_time, EndTime=end_time, Period=300, Statistics=['Average'] ) # Calculate average CPU utilization avg_cpu_service = sum(datapoint['Average'] for datapoint in cpu_response['Datapoints']) / len(cpu_response['Datapoints']) if cpu_response['Datapoints'] else 0 # Check if the average CPU utilization is below the threshold if avg_cpu_service < threshold: low_cpu_clusters.append({ 'Region': region, 'ClusterName': cluster_name, 'ServiceName': service_name, 'AverageCPU': avg_cpu_service, 'Type': 'Service' }) # Fetch task ARNs in the cluster tasks = ecs.list_tasks(cluster=cluster_name)['taskArns'] # Loop through each task to fetch its average CPU utilization for task in tasks: task_name = task.split('/')[-1] # Extract the task name from the ARN # Get average CPU utilization from CloudWatch cpu_response = cloudwatch.get_metric_statistics( Namespace='AWS/ECS', MetricName='CPUUtilization', Dimensions=[ {'Name': 'ClusterName', 'Value': cluster_name}, {'Name': 'TaskId', 'Value': task_name}, ], StartTime=start_time, EndTime=end_time, Period=300, Statistics=['Average'] ) # Calculate average CPU utilization avg_cpu_task = sum(datapoint['Average'] for datapoint in cpu_response['Datapoints']) / len(cpu_response['Datapoints']) if cpu_response['Datapoints'] else 0 # Check if the average CPU utilization is below the threshold if avg_cpu_task < threshold: low_cpu_clusters.append({ 'Region': region, 'ClusterName': cluster_name, 'TaskName': task_name, 'AverageCPU': avg_cpu_task, 'Type': 'Task' }) except ClientError as ce: print(f"A botocore exception occurred in region {region}: {ce.response['Error']['Message']}") except Exception as e: print(f"An unknown error occurred in region {region}: {e}") # General exception handling return low_cpu_clusters # Return the list of low CPU utilization clusters/services/tasks def display_low_cpu_clusters(data): # Initialize table with the desired structure and headers table = context.newtable() table.title = "Low CPU Usage Clusters Overview" table.num_cols = 5 # Number of columns for Region, ClusterName, TaskName, AverageCPU, and Type table.num_rows = 1 # Starts with one row for headers table.has_header_row = True # Define header names based on the structure of low_cpu_clusters data headers = ["Region", "Cluster Name", "Task/Service Name", "Average CPU (%)", "Type"] # Set headers in the first row for col_num, header in enumerate(headers): table.setval(0, col_num, header) # Populate the table with data for row_num, cluster_info in enumerate(data, start=1): # Starting from the second row table.num_rows += 1 # Add a row for each entry values = [ cluster_info["Region"], cluster_info["ClusterName"], cluster_info.get("TaskName", cluster_info.get("ServiceName", "N/A")), # Handle both Task and Service f"{cluster_info['AverageCPU']:.2f}", # Format average CPU as a float with 2 decimal places cluster_info["Type"] ] for col_num, value in enumerate(values): table.setval(row_num, col_num, value) # Define the AWS regions and CPU utilization threshold #regions_to_check = ['us-east-1', 'us-west-2'] #cpu_threshold = 20 # In percentage # Execute the function and get low CPU utilization clusters low_cpu_clusters_list = get_ecs_clusters_with_low_cpu_utilization(regions, threshold=int(cpu_threshold)) # Display the result if low_cpu_clusters_list: display_low_cpu_clusters(low_cpu_clusters_list) ''' #print(f"Found {len(low_cpu_clusters_list)} ECS clusters with low CPU Utilization") #for entry in low_cpu_clusters_list: #print(entry) # Print the header print(f"{'Region':<15} {'ClusterName':<20} {'Type':<10} {'Name':<25} {'Average CPU Usage':<20}") # Iterate through each entry in the list for item in low_cpu_clusters_list: region = item['Region'] cluster_name = item['ClusterName'] entity_type = item['Type'] name = item.get('ServiceName', item.get('TaskName', 'Unknown')) # Use ServiceName or TaskName, default to 'Unknown' cpu_usage = item['AverageCPU'] # Print each item's details in a formatted manner print(f"{region:<15} {cluster_name:<20} {entity_type:<10} {name:<25} {cpu_usage:<20.2f}")''' else: print(f"Found {len(low_cpu_clusters_list)} ECS clusters with low CPU Utilization") context.skip_sub_tasks=True
    copied
    1
    1. 1.1

      Delete AWS ECS Clusters with low CPU Utilization

      There was a problem that the LLM was not able to address. Please rephrase your prompt and try again.

      This task removes specified ECS clusters, thereby helping organizations maintain a clean and efficient environment. This task is particularly useful for decommissioning clusters that are no longer needed, or that have been identified as underutilized, thereby contributing to cost savings and resource optimization. It ensures that all associated services and tasks within the clusters are properly terminated before removing the clusters themselves.

      import boto3 from botocore.exceptions import ClientError creds = _get_creds(cred_label)['creds'] access_key = creds['username'] secret_key = creds['password'] def delete_low_cpu_clusters(low_cpu_clusters): """ Deletes ECS clusters, their services, and tasks based on low CPU utilization. Args: low_cpu_clusters (list): List of dictionaries containing cluster and service/task details. Returns: None """ deleted_clusters = False # Flag to track if any clusters get deleted # Loop through each entry in low_cpu_clusters for entry in low_cpu_clusters: try: region = entry['Region'] cluster_name = entry['ClusterName'] service_name = entry.get('ServiceName', None) task_name = entry.get('TaskName', None) type_ = entry['Type'] # Initialize ECS client for the region ecs = boto3.client('ecs', aws_access_key_id=access_key,aws_secret_access_key=secret_key,region_name=region) # Delete service if it's a low CPU service # A service can't be deleted if it has an active associated task but will be deleted if the cluster is then deleted if type_ == 'Service': ecs.update_service( cluster=cluster_name, service=service_name, desiredCount=0 # Set desired task count to 0 before deleting ) ecs.delete_service( cluster=cluster_name, service=service_name ) print(f"Deleted service {service_name} in cluster {cluster_name}") # Stop task if it's a low CPU task if type_ == 'Task': ecs.stop_task( cluster=cluster_name, task=task_name ) print(f"Stopped task {task_name} in cluster {cluster_name}") # Delete cluster ecs.delete_cluster(cluster=cluster_name) print(f"Deleted cluster {cluster_name}") deleted_clusters = True except ClientError as e: print(f"A botocore exception occurred: {e.response['Error']['Message']}") except Exception as e: print(f"An unknown error occurred: {e}") # General exception handling # If no clusters were deleted, print a message stating the region being checked if not deleted_clusters: print(f"No ECS clusters with low CPU utilization were deleted") # Execute the function to delete low CPU utilization clusters, services, and tasks delete_low_cpu_clusters(low_cpu_clusters=low_cpu_clusters_list)
      copied
      1.1