if evaluation_mode == 'supervised':
    # ORIGINAL SUPERVISED EVALUATION (COMPLETELY UNCHANGED)
    print("=" * 60)
    print("JUMPSTART MODEL COMPARISON ACROSS DIFFERENT DATASETS")
    print("=" * 60)

    # Dictionary to store results for each dataset
    all_results = {}

    # Helper functions to count labels in different formats
    def count_positive_labels(labels):
        return sum(1 for label in labels if str(label).lower() in ['label_1', '1', 'positive', 'true', 'yes'])
    
    def count_negative_labels(labels):
        return sum(1 for label in labels if str(label).lower() in ['label_0', '0', 'negative', 'false', 'no'])
    
    # Run comparison for each dataset type
    for dataset_name, dataset in datasets.items():
        print(f"\n\n{'-' * 20} Testing with {dataset_name} Dataset {'-' * 20}")
        print(f"Dataset size: {len(dataset)} samples")
        
        positive_count = count_positive_labels(dataset['true_label'])
        negative_count = count_negative_labels(dataset['true_label'])
        
        print(f"  - Positive samples: {positive_count}")
        print(f"  - Negative samples: {negative_count}")

        # Run the comparison
        results = compare_multiple_models(model_endpoints, dataset)

        # Store results for this dataset
        all_results[dataset_name] = results

        # Display results for this dataset
        print(f"\n{'-' * 20} {dataset_name} Dataset Results {'-' * 20}")
        display_comparison_results(results)

    print("Model comparison across all datasets completed!")

else:
    # UNSUPERVISED EVALUATION
    print("Starting Unsupervised Model Evaluation...")
    print("=" * 60)
    
    dataset_name = list(datasets.keys())[0]  # Single custom dataset
    test_data = datasets[dataset_name]
    
    # Import unsupervised functions
    exec(open('src/unsupervised_evaluation.py').read())
    
    # Run unsupervised evaluation
    model_results = evaluate_models_unsupervised(model_endpoints, test_data)
    unsupervised_metrics = calculate_unsupervised_metrics(model_results)
    
    # Store results
    all_results = {
        dataset_name: {
            'model_results': model_results,
            'metrics': unsupervised_metrics
        }
    }
    
    print("Unsupervised evaluation completed!")
