There are more AWS SDK examples available in the AWS Doc SDK Examples
Get started with Batch and Fargate using the CLI
The following code example shows how to:
Create an IAM execution role for ECS tasks
Create a managed Fargate compute environment
Create a job queue with priority settings
Register a job definition for containerized workloads
Submit and monitor a batch job execution
View job output in CloudWatch Logs
Clean up resources in proper dependency order
- Bash
-
- AWS CLI with Bash script
-
Note
There's more on GitHub. Find the complete example and learn how to set up and run in the Sample developer tutorials
repository. #!/bin/bash # AWS Batch Fargate Getting Started Script - Fixed Version # This script demonstrates creating AWS Batch resources with Fargate orchestration # # HIGH SEVERITY FIXES APPLIED: # 1. Added IAM role propagation delay after role creation # 2. Added resource state validation before deletion attempts set -e # Exit on any error # Configuration SCRIPT_NAME="batch-fargate-tutorial" LOG_FILE="${SCRIPT_NAME}-$(date +%Y%m%d-%H%M%S).log" RANDOM_SUFFIX=$(openssl rand -hex 6) COMPUTE_ENV_NAME="batch-fargate-compute-${RANDOM_SUFFIX}" JOB_QUEUE_NAME="batch-fargate-queue-${RANDOM_SUFFIX}" JOB_DEF_NAME="batch-fargate-jobdef-${RANDOM_SUFFIX}" JOB_NAME="batch-hello-world-${RANDOM_SUFFIX}" ROLE_NAME="BatchEcsTaskExecutionRole-${RANDOM_SUFFIX}" TRUST_POLICY_FILE="batch-trust-policy-${RANDOM_SUFFIX}.json" # Array to track created resources for cleanup CREATED_RESOURCES=() # Logging function log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" } # Error handling function handle_error() { log "ERROR: Script failed at line $1" log "Attempting to clean up resources created so far..." cleanup_resources exit 1 } # Set up error handling trap 'handle_error $LINENO' ERR # Function to wait for resource to be ready wait_for_compute_env() { local env_name=$1 log "Waiting for compute environment $env_name to be VALID..." while true; do local status=$(aws batch describe-compute-environments \ --compute-environments "$env_name" \ --query 'computeEnvironments[0].status' \ --output text 2>/dev/null || echo "NOT_FOUND") if [ "$status" = "VALID" ]; then log "Compute environment $env_name is ready" break elif [ "$status" = "INVALID" ] || [ "$status" = "NOT_FOUND" ]; then log "ERROR: Compute environment $env_name failed to create properly" return 1 fi log "Compute environment status: $status. Waiting 10 seconds..." sleep 10 done } # Function to wait for job queue to be ready wait_for_job_queue() { local queue_name=$1 log "Waiting for job queue $queue_name to be VALID..." while true; do local state=$(aws batch describe-job-queues \ --job-queues "$queue_name" \ --query 'jobQueues[0].state' \ --output text 2>/dev/null || echo "NOT_FOUND") if [ "$state" = "ENABLED" ]; then log "Job queue $queue_name is ready" break elif [ "$state" = "DISABLED" ] || [ "$state" = "NOT_FOUND" ]; then log "ERROR: Job queue $queue_name failed to create properly" return 1 fi log "Job queue state: $state. Waiting 10 seconds..." sleep 10 done } # Function to wait for job completion wait_for_job() { local job_id=$1 log "Waiting for job $job_id to complete..." while true; do local status=$(aws batch describe-jobs \ --jobs "$job_id" \ --query 'jobs[0].status' \ --output text 2>/dev/null || echo "NOT_FOUND") if [ "$status" = "SUCCEEDED" ]; then log "Job $job_id completed successfully" break elif [ "$status" = "FAILED" ]; then log "ERROR: Job $job_id failed" return 1 fi log "Job status: $status. Waiting 30 seconds..." sleep 30 done } # FIXED: Added function to wait for resource state before deletion wait_for_resource_state() { local resource_type=$1 local resource_name=$2 local expected_state=$3 local max_attempts=30 local attempt=0 log "Waiting for $resource_type $resource_name to reach state: $expected_state" while [ $attempt -lt $max_attempts ]; do local current_state="" case $resource_type in "JOB_QUEUE") current_state=$(aws batch describe-job-queues \ --job-queues "$resource_name" \ --query 'jobQueues[0].state' \ --output text 2>/dev/null || echo "NOT_FOUND") ;; "COMPUTE_ENV") current_state=$(aws batch describe-compute-environments \ --compute-environments "$resource_name" \ --query 'computeEnvironments[0].status' \ --output text 2>/dev/null || echo "NOT_FOUND") ;; esac if [ "$current_state" = "$expected_state" ]; then log "$resource_type $resource_name is now in state: $expected_state" return 0 fi log "$resource_type $resource_name state: $current_state (waiting for $expected_state)" sleep 10 ((attempt++)) done log "WARNING: $resource_type $resource_name did not reach expected state after $max_attempts attempts" return 1 } # Cleanup function cleanup_resources() { log "Starting cleanup of created resources..." # Clean up in reverse order of creation for ((i=${#CREATED_RESOURCES[@]}-1; i>=0; i--)); do local resource="${CREATED_RESOURCES[i]}" local resource_type=$(echo "$resource" | cut -d: -f1) local resource_name=$(echo "$resource" | cut -d: -f2) log "Cleaning up $resource_type: $resource_name" case $resource_type in "JOB_QUEUE") # FIXED: Validate state before deletion aws batch update-job-queue --job-queue "$resource_name" --state DISABLED 2>/dev/null || true wait_for_resource_state "JOB_QUEUE" "$resource_name" "DISABLED" || true aws batch delete-job-queue --job-queue "$resource_name" 2>/dev/null || true ;; "COMPUTE_ENV") # FIXED: Validate state before deletion aws batch update-compute-environment --compute-environment "$resource_name" --state DISABLED 2>/dev/null || true wait_for_resource_state "COMPUTE_ENV" "$resource_name" "DISABLED" || true aws batch delete-compute-environment --compute-environment "$resource_name" 2>/dev/null || true ;; "IAM_ROLE") aws iam detach-role-policy --role-name "$resource_name" --policy-arn "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" 2>/dev/null || true aws iam delete-role --role-name "$resource_name" 2>/dev/null || true ;; "FILE") rm -f "$resource_name" 2>/dev/null || true ;; esac done log "Cleanup completed" } # Main script execution main() { log "Starting AWS Batch Fargate tutorial script - Fixed Version" log "Log file: $LOG_FILE" # Get AWS account ID log "Getting AWS account ID..." ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) log "Account ID: $ACCOUNT_ID" # Get default VPC and subnets log "Getting default VPC and subnets..." DEFAULT_VPC=$(aws ec2 describe-vpcs \ --filters "Name=is-default,Values=true" \ --query 'Vpcs[0].VpcId' \ --output text) if [ "$DEFAULT_VPC" = "None" ] || [ "$DEFAULT_VPC" = "null" ]; then log "ERROR: No default VPC found. Please create a VPC first." exit 1 fi log "Default VPC: $DEFAULT_VPC" # Get subnets in the default VPC SUBNETS=$(aws ec2 describe-subnets \ --filters "Name=vpc-id,Values=$DEFAULT_VPC" \ --query 'Subnets[*].SubnetId' \ --output text) if [ -z "$SUBNETS" ]; then log "ERROR: No subnets found in default VPC" exit 1 fi # Convert tab/space-separated subnets to JSON array format SUBNET_ARRAY=$(echo "$SUBNETS" | tr '\t ' '\n' | sed 's/^/"/;s/$/"/' | paste -sd ',' -) log "Subnets: $SUBNETS" log "Subnet array: [$SUBNET_ARRAY]" # Get default security group for the VPC DEFAULT_SG=$(aws ec2 describe-security-groups \ --filters "Name=vpc-id,Values=$DEFAULT_VPC" "Name=group-name,Values=default" \ --query 'SecurityGroups[0].GroupId' \ --output text) if [ "$DEFAULT_SG" = "None" ] || [ "$DEFAULT_SG" = "null" ]; then log "ERROR: No default security group found in VPC" exit 1 fi log "Default security group: $DEFAULT_SG" # Step 1: Create IAM execution role log "Step 1: Creating IAM execution role..." # Create trust policy document cat > "$TRUST_POLICY_FILE" << EOF { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": "ecs-tasks.amazonaws.com" }, "Action": "sts:AssumeRole" } ] } EOF CREATED_RESOURCES+=("FILE:$TRUST_POLICY_FILE") # Create the role aws iam create-role \ --role-name "$ROLE_NAME" \ --assume-role-policy-document "file://$TRUST_POLICY_FILE" CREATED_RESOURCES+=("IAM_ROLE:$ROLE_NAME") # Attach policy aws iam attach-role-policy \ --role-name "$ROLE_NAME" \ --policy-arn "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" log "IAM role created: $ROLE_NAME" # FIXED: Wait for IAM role propagation log "Waiting for IAM role propagation (15 seconds)..." sleep 15 # Step 2: Create compute environment log "Step 2: Creating Fargate compute environment..." aws batch create-compute-environment \ --compute-environment-name "$COMPUTE_ENV_NAME" \ --type MANAGED \ --state ENABLED \ --compute-resources "{ \"type\": \"FARGATE\", \"maxvCpus\": 256, \"subnets\": [$SUBNET_ARRAY], \"securityGroupIds\": [\"$DEFAULT_SG\"] }" CREATED_RESOURCES+=("COMPUTE_ENV:$COMPUTE_ENV_NAME") # Wait for compute environment to be ready wait_for_compute_env "$COMPUTE_ENV_NAME" # Step 3: Create job queue log "Step 3: Creating job queue..." aws batch create-job-queue \ --job-queue-name "$JOB_QUEUE_NAME" \ --state ENABLED \ --priority 900 \ --compute-environment-order order=1,computeEnvironment="$COMPUTE_ENV_NAME" CREATED_RESOURCES+=("JOB_QUEUE:$JOB_QUEUE_NAME") # Wait for job queue to be ready wait_for_job_queue "$JOB_QUEUE_NAME" # Step 4: Create job definition log "Step 4: Creating job definition..." aws batch register-job-definition \ --job-definition-name "$JOB_DEF_NAME" \ --type container \ --platform-capabilities FARGATE \ --container-properties "{ \"image\": \"busybox\", \"resourceRequirements\": [ {\"type\": \"VCPU\", \"value\": \"0.25\"}, {\"type\": \"MEMORY\", \"value\": \"512\"} ], \"command\": [\"echo\", \"hello world\"], \"networkConfiguration\": { \"assignPublicIp\": \"ENABLED\" }, \"executionRoleArn\": \"arn:aws:iam::${ACCOUNT_ID}:role/${ROLE_NAME}\" }" log "Job definition created: $JOB_DEF_NAME" # Step 5: Submit job log "Step 5: Submitting job..." JOB_ID=$(aws batch submit-job \ --job-name "$JOB_NAME" \ --job-queue "$JOB_QUEUE_NAME" \ --job-definition "$JOB_DEF_NAME" \ --query 'jobId' \ --output text) log "Job submitted with ID: $JOB_ID" # Step 6: Wait for job completion and view output log "Step 6: Waiting for job completion..." wait_for_job "$JOB_ID" # Get log stream name log "Getting job logs..." LOG_STREAM=$(aws batch describe-jobs \ --jobs "$JOB_ID" \ --query 'jobs[0].attempts[0].taskProperties.containers[0].logStreamName' \ --output text) if [ "$LOG_STREAM" != "None" ] && [ "$LOG_STREAM" != "null" ]; then log "Log stream: $LOG_STREAM" log "Job output:" aws logs get-log-events \ --log-group-name "/aws/batch/job" \ --log-stream-name "$LOG_STREAM" \ --query 'events[*].message' \ --output text | tee -a "$LOG_FILE" else log "No log stream available for job" fi log "Tutorial completed successfully!" # Show created resources echo "" echo "===========================================" echo "CREATED RESOURCES" echo "===========================================" echo "The following resources were created:" for resource in "${CREATED_RESOURCES[@]}"; do echo " - $resource" done echo "" echo "===========================================" echo "CLEANUP CONFIRMATION" echo "===========================================" echo "Do you want to clean up all created resources? (y/n): " read -r CLEANUP_CHOICE if [[ "$CLEANUP_CHOICE" =~ ^[Yy]$ ]]; then cleanup_resources log "All resources have been cleaned up" else log "Resources left intact. You can clean them up manually later." echo "To clean up manually, run the following commands:" echo "aws batch update-job-queue --job-queue $JOB_QUEUE_NAME --state DISABLED" echo "aws batch delete-job-queue --job-queue $JOB_QUEUE_NAME" echo "aws batch update-compute-environment --compute-environment $COMPUTE_ENV_NAME --state DISABLED" echo "aws batch delete-compute-environment --compute-environment $COMPUTE_ENV_NAME" echo "aws iam detach-role-policy --role-name $ROLE_NAME --policy-arn arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" echo "aws iam delete-role --role-name $ROLE_NAME" fi } # Run main function main "$@"
-
For API details, see the following topics in AWS CLI Command Reference.
-
Scenarios
Amazon Bedrock