Get started with Batch and Fargate using the CLI - AWS SDK Code Examples

There are more AWS SDK examples available in the AWS Doc SDK Examples GitHub repo.

Get started with Batch and Fargate using the CLI

The following code example shows how to:

  • Create an IAM execution role for ECS tasks

  • Create a managed Fargate compute environment

  • Create a job queue with priority settings

  • Register a job definition for containerized workloads

  • Submit and monitor a batch job execution

  • View job output in CloudWatch Logs

  • Clean up resources in proper dependency order

Bash
AWS CLI with Bash script
Note

There's more on GitHub. Find the complete example and learn how to set up and run in the Sample developer tutorials repository.

#!/bin/bash # AWS Batch Fargate Getting Started Script - Fixed Version # This script demonstrates creating AWS Batch resources with Fargate orchestration # # HIGH SEVERITY FIXES APPLIED: # 1. Added IAM role propagation delay after role creation # 2. Added resource state validation before deletion attempts set -e # Exit on any error # Configuration SCRIPT_NAME="batch-fargate-tutorial" LOG_FILE="${SCRIPT_NAME}-$(date +%Y%m%d-%H%M%S).log" RANDOM_SUFFIX=$(openssl rand -hex 6) COMPUTE_ENV_NAME="batch-fargate-compute-${RANDOM_SUFFIX}" JOB_QUEUE_NAME="batch-fargate-queue-${RANDOM_SUFFIX}" JOB_DEF_NAME="batch-fargate-jobdef-${RANDOM_SUFFIX}" JOB_NAME="batch-hello-world-${RANDOM_SUFFIX}" ROLE_NAME="BatchEcsTaskExecutionRole-${RANDOM_SUFFIX}" TRUST_POLICY_FILE="batch-trust-policy-${RANDOM_SUFFIX}.json" # Array to track created resources for cleanup CREATED_RESOURCES=() # Logging function log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" } # Error handling function handle_error() { log "ERROR: Script failed at line $1" log "Attempting to clean up resources created so far..." cleanup_resources exit 1 } # Set up error handling trap 'handle_error $LINENO' ERR # Function to wait for resource to be ready wait_for_compute_env() { local env_name=$1 log "Waiting for compute environment $env_name to be VALID..." while true; do local status=$(aws batch describe-compute-environments \ --compute-environments "$env_name" \ --query 'computeEnvironments[0].status' \ --output text 2>/dev/null || echo "NOT_FOUND") if [ "$status" = "VALID" ]; then log "Compute environment $env_name is ready" break elif [ "$status" = "INVALID" ] || [ "$status" = "NOT_FOUND" ]; then log "ERROR: Compute environment $env_name failed to create properly" return 1 fi log "Compute environment status: $status. Waiting 10 seconds..." sleep 10 done } # Function to wait for job queue to be ready wait_for_job_queue() { local queue_name=$1 log "Waiting for job queue $queue_name to be VALID..." while true; do local state=$(aws batch describe-job-queues \ --job-queues "$queue_name" \ --query 'jobQueues[0].state' \ --output text 2>/dev/null || echo "NOT_FOUND") if [ "$state" = "ENABLED" ]; then log "Job queue $queue_name is ready" break elif [ "$state" = "DISABLED" ] || [ "$state" = "NOT_FOUND" ]; then log "ERROR: Job queue $queue_name failed to create properly" return 1 fi log "Job queue state: $state. Waiting 10 seconds..." sleep 10 done } # Function to wait for job completion wait_for_job() { local job_id=$1 log "Waiting for job $job_id to complete..." while true; do local status=$(aws batch describe-jobs \ --jobs "$job_id" \ --query 'jobs[0].status' \ --output text 2>/dev/null || echo "NOT_FOUND") if [ "$status" = "SUCCEEDED" ]; then log "Job $job_id completed successfully" break elif [ "$status" = "FAILED" ]; then log "ERROR: Job $job_id failed" return 1 fi log "Job status: $status. Waiting 30 seconds..." sleep 30 done } # FIXED: Added function to wait for resource state before deletion wait_for_resource_state() { local resource_type=$1 local resource_name=$2 local expected_state=$3 local max_attempts=30 local attempt=0 log "Waiting for $resource_type $resource_name to reach state: $expected_state" while [ $attempt -lt $max_attempts ]; do local current_state="" case $resource_type in "JOB_QUEUE") current_state=$(aws batch describe-job-queues \ --job-queues "$resource_name" \ --query 'jobQueues[0].state' \ --output text 2>/dev/null || echo "NOT_FOUND") ;; "COMPUTE_ENV") current_state=$(aws batch describe-compute-environments \ --compute-environments "$resource_name" \ --query 'computeEnvironments[0].status' \ --output text 2>/dev/null || echo "NOT_FOUND") ;; esac if [ "$current_state" = "$expected_state" ]; then log "$resource_type $resource_name is now in state: $expected_state" return 0 fi log "$resource_type $resource_name state: $current_state (waiting for $expected_state)" sleep 10 ((attempt++)) done log "WARNING: $resource_type $resource_name did not reach expected state after $max_attempts attempts" return 1 } # Cleanup function cleanup_resources() { log "Starting cleanup of created resources..." # Clean up in reverse order of creation for ((i=${#CREATED_RESOURCES[@]}-1; i>=0; i--)); do local resource="${CREATED_RESOURCES[i]}" local resource_type=$(echo "$resource" | cut -d: -f1) local resource_name=$(echo "$resource" | cut -d: -f2) log "Cleaning up $resource_type: $resource_name" case $resource_type in "JOB_QUEUE") # FIXED: Validate state before deletion aws batch update-job-queue --job-queue "$resource_name" --state DISABLED 2>/dev/null || true wait_for_resource_state "JOB_QUEUE" "$resource_name" "DISABLED" || true aws batch delete-job-queue --job-queue "$resource_name" 2>/dev/null || true ;; "COMPUTE_ENV") # FIXED: Validate state before deletion aws batch update-compute-environment --compute-environment "$resource_name" --state DISABLED 2>/dev/null || true wait_for_resource_state "COMPUTE_ENV" "$resource_name" "DISABLED" || true aws batch delete-compute-environment --compute-environment "$resource_name" 2>/dev/null || true ;; "IAM_ROLE") aws iam detach-role-policy --role-name "$resource_name" --policy-arn "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" 2>/dev/null || true aws iam delete-role --role-name "$resource_name" 2>/dev/null || true ;; "FILE") rm -f "$resource_name" 2>/dev/null || true ;; esac done log "Cleanup completed" } # Main script execution main() { log "Starting AWS Batch Fargate tutorial script - Fixed Version" log "Log file: $LOG_FILE" # Get AWS account ID log "Getting AWS account ID..." ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) log "Account ID: $ACCOUNT_ID" # Get default VPC and subnets log "Getting default VPC and subnets..." DEFAULT_VPC=$(aws ec2 describe-vpcs \ --filters "Name=is-default,Values=true" \ --query 'Vpcs[0].VpcId' \ --output text) if [ "$DEFAULT_VPC" = "None" ] || [ "$DEFAULT_VPC" = "null" ]; then log "ERROR: No default VPC found. Please create a VPC first." exit 1 fi log "Default VPC: $DEFAULT_VPC" # Get subnets in the default VPC SUBNETS=$(aws ec2 describe-subnets \ --filters "Name=vpc-id,Values=$DEFAULT_VPC" \ --query 'Subnets[*].SubnetId' \ --output text) if [ -z "$SUBNETS" ]; then log "ERROR: No subnets found in default VPC" exit 1 fi # Convert tab/space-separated subnets to JSON array format SUBNET_ARRAY=$(echo "$SUBNETS" | tr '\t ' '\n' | sed 's/^/"/;s/$/"/' | paste -sd ',' -) log "Subnets: $SUBNETS" log "Subnet array: [$SUBNET_ARRAY]" # Get default security group for the VPC DEFAULT_SG=$(aws ec2 describe-security-groups \ --filters "Name=vpc-id,Values=$DEFAULT_VPC" "Name=group-name,Values=default" \ --query 'SecurityGroups[0].GroupId' \ --output text) if [ "$DEFAULT_SG" = "None" ] || [ "$DEFAULT_SG" = "null" ]; then log "ERROR: No default security group found in VPC" exit 1 fi log "Default security group: $DEFAULT_SG" # Step 1: Create IAM execution role log "Step 1: Creating IAM execution role..." # Create trust policy document cat > "$TRUST_POLICY_FILE" << EOF { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": "ecs-tasks.amazonaws.com" }, "Action": "sts:AssumeRole" } ] } EOF CREATED_RESOURCES+=("FILE:$TRUST_POLICY_FILE") # Create the role aws iam create-role \ --role-name "$ROLE_NAME" \ --assume-role-policy-document "file://$TRUST_POLICY_FILE" CREATED_RESOURCES+=("IAM_ROLE:$ROLE_NAME") # Attach policy aws iam attach-role-policy \ --role-name "$ROLE_NAME" \ --policy-arn "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" log "IAM role created: $ROLE_NAME" # FIXED: Wait for IAM role propagation log "Waiting for IAM role propagation (15 seconds)..." sleep 15 # Step 2: Create compute environment log "Step 2: Creating Fargate compute environment..." aws batch create-compute-environment \ --compute-environment-name "$COMPUTE_ENV_NAME" \ --type MANAGED \ --state ENABLED \ --compute-resources "{ \"type\": \"FARGATE\", \"maxvCpus\": 256, \"subnets\": [$SUBNET_ARRAY], \"securityGroupIds\": [\"$DEFAULT_SG\"] }" CREATED_RESOURCES+=("COMPUTE_ENV:$COMPUTE_ENV_NAME") # Wait for compute environment to be ready wait_for_compute_env "$COMPUTE_ENV_NAME" # Step 3: Create job queue log "Step 3: Creating job queue..." aws batch create-job-queue \ --job-queue-name "$JOB_QUEUE_NAME" \ --state ENABLED \ --priority 900 \ --compute-environment-order order=1,computeEnvironment="$COMPUTE_ENV_NAME" CREATED_RESOURCES+=("JOB_QUEUE:$JOB_QUEUE_NAME") # Wait for job queue to be ready wait_for_job_queue "$JOB_QUEUE_NAME" # Step 4: Create job definition log "Step 4: Creating job definition..." aws batch register-job-definition \ --job-definition-name "$JOB_DEF_NAME" \ --type container \ --platform-capabilities FARGATE \ --container-properties "{ \"image\": \"busybox\", \"resourceRequirements\": [ {\"type\": \"VCPU\", \"value\": \"0.25\"}, {\"type\": \"MEMORY\", \"value\": \"512\"} ], \"command\": [\"echo\", \"hello world\"], \"networkConfiguration\": { \"assignPublicIp\": \"ENABLED\" }, \"executionRoleArn\": \"arn:aws:iam::${ACCOUNT_ID}:role/${ROLE_NAME}\" }" log "Job definition created: $JOB_DEF_NAME" # Step 5: Submit job log "Step 5: Submitting job..." JOB_ID=$(aws batch submit-job \ --job-name "$JOB_NAME" \ --job-queue "$JOB_QUEUE_NAME" \ --job-definition "$JOB_DEF_NAME" \ --query 'jobId' \ --output text) log "Job submitted with ID: $JOB_ID" # Step 6: Wait for job completion and view output log "Step 6: Waiting for job completion..." wait_for_job "$JOB_ID" # Get log stream name log "Getting job logs..." LOG_STREAM=$(aws batch describe-jobs \ --jobs "$JOB_ID" \ --query 'jobs[0].attempts[0].taskProperties.containers[0].logStreamName' \ --output text) if [ "$LOG_STREAM" != "None" ] && [ "$LOG_STREAM" != "null" ]; then log "Log stream: $LOG_STREAM" log "Job output:" aws logs get-log-events \ --log-group-name "/aws/batch/job" \ --log-stream-name "$LOG_STREAM" \ --query 'events[*].message' \ --output text | tee -a "$LOG_FILE" else log "No log stream available for job" fi log "Tutorial completed successfully!" # Show created resources echo "" echo "===========================================" echo "CREATED RESOURCES" echo "===========================================" echo "The following resources were created:" for resource in "${CREATED_RESOURCES[@]}"; do echo " - $resource" done echo "" echo "===========================================" echo "CLEANUP CONFIRMATION" echo "===========================================" echo "Do you want to clean up all created resources? (y/n): " read -r CLEANUP_CHOICE if [[ "$CLEANUP_CHOICE" =~ ^[Yy]$ ]]; then cleanup_resources log "All resources have been cleaned up" else log "Resources left intact. You can clean them up manually later." echo "To clean up manually, run the following commands:" echo "aws batch update-job-queue --job-queue $JOB_QUEUE_NAME --state DISABLED" echo "aws batch delete-job-queue --job-queue $JOB_QUEUE_NAME" echo "aws batch update-compute-environment --compute-environment $COMPUTE_ENV_NAME --state DISABLED" echo "aws batch delete-compute-environment --compute-environment $COMPUTE_ENV_NAME" echo "aws iam detach-role-policy --role-name $ROLE_NAME --policy-arn arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" echo "aws iam delete-role --role-name $ROLE_NAME" fi } # Run main function main "$@"