Terjemahan disediakan oleh mesin penerjemah. Jika konten terjemahan yang diberikan bertentangan dengan versi bahasa Inggris aslinya, utamakan versi bahasa Inggris.
Prasyarat
-
-
Pilih tab Izin.
-
Pilih Edit.
-
[ { "AllowedHeaders": [ "*" ], "AllowedMethods": [ "GET", "PUT", "POST", "DELETE" ], "AllowedOrigins": [ "*" ], "ExposeHeaders": [ "Access-Control-Allow-Origin" ] } ] -
Pilih Simpan perubahan.
-
-
-
Pilih Lampirkan.
-
-
Pilih Lampirkan kebijakan.
-
Pilih Pilih.
-
!pip install fmevalcatatan
-
import sagemaker from sagemaker.jumpstart.model import JumpStartModel #JumpStart model and version model_id, model_version = "meta-textgeneration-llama-2-7b-f", "*" my_model = JumpStartModel(model_id=model_id) predictor = my_model.deploy() endpoint_name = predictor.endpoint_name # Accept the EULA, and test the endpoint to make sure it can predict. predictor.predict({"inputs": [[{"role":"user", "content": "Hello how are you?"}]]}, custom_attributes='accept_eula=true') -
from fmeval.model_runners.sm_jumpstart_model_runner import JumpStartModelRunner js_model_runner = JumpStartModelRunner( endpoint_name=endpoint_name, model_id=model_id, model_version=model_version ) -
js_model_runner.predict("What is the capital of London")
-
import boto3 import json bedrock = boto3.client(service_name='bedrock') bedrock_runtime = boto3.client(service_name='bedrock-runtime') model_id = "amazon.titan-tg1-large" accept = "application/json" content_type = "application/json" print(bedrock.get_foundation_model(modelIdentifier=modelId).get('modelDetails'))Untuk informasi selengkapnya tentang parameter ini, lihat InvokeModelWithResponseStream.
-
from fmeval.model_runners.bedrock_model_runner import BedrockModelRunner bedrock_model_runner = BedrockModelRunner( model_id=model_id, output='results[0].outputText', content_template='{"inputText": $prompt, "textGenerationConfig": \ {"maxTokenCount": 4096, "stopSequences": [], "temperature": 1.0, "topP": 1.0}}', )-
-
"content_template": "{\"prompt\": $prompt, \"max_tokens_to_sample\": 500}""content_template": "{\"inputs\": $prompt, \"parameters\":{\"max_new_tokens\": \ 10, \"top_p\": 0.9, \"temperature\": 0.8}}"bedrock_model_runner.predict("What is the capital of London?")
from fmeval.eval_algo_mapping import EVAL_ALGORITHMS print(EVAL_ALGORITHMS.keys())
#Evaluate your custom sample model_output = model_runner.predict("London is the capital of?")[0] eval_algo.evaluate_sample(target_output="UK<OR>England<OR>United Kingdom", model_output=model_output)
-
-
-
Default ke
False. -
Default ke
300.
from fmeval.eval import get_eval_algorithm from fmeval.eval_algorithms.qa_accuracy import QAAccuracy, QAAccuracyConfig eval_algo = QAAccuracy(QAAccuracyConfig(target_output_delimiter="<OR>"))) eval_output = eval_algo.evaluate(model=model_runner, dataset_config=config, prompt_template="$feature", save=True)
-
Anda dapat memilih
rouge1,rouge2, ataurougeL. -
-
from fmeval.eval import get_eval_algorithm from fmeval.eval_algorithms.summarization_accuracy import SummarizationAccuracy, SummarizationAccuracyConfig eval_algo = SummarizationAccuracy(SummarizationAccuracyConfig(rouge_type="rouge1",model_type_for_bertscore="MICROSOFT_DEBERTA_MODEL")) eval_output = eval_algo.evaluate(model=model_runner, dataset_config=config, prompt_template="$feature", save=True)
Nilai default-nya adalah micro.
from fmeval.eval import get_eval_algorithm from fmeval.eval_algorithms.classification_accuracy import ClassificationAccuracy, ClassificationAccuracyConfig eval_algo = ClassificationAccuracy(ClassificationAccuracyConfig(multiclass_average_strategy="samples",valid_labels=["animal_type","plant_type","fungi_type"])) eval_output = eval_algo.evaluate(model=model_runner, dataset_config=config, prompt_template="$feature", save=True)
from fmeval.eval import get_eval_algorithm from fmeval.eval_algorithms.factual_knowledge import FactualKnowledge, FactualKnowledgeConfig eval_algo = FactualKnowledge(FactualKnowledgeConfig()) eval_output = eval_algo.evaluate(model=model_runner, dataset_config=config, prompt_template="$feature", save=True)
from fmeval.eval import get_eval_algorithm from fmeval.eval_algorithms.prompt_stereotyping import PromptStereotyping eval_algo = PromptStereotyping() eval_output = eval_algo.evaluate(model=model_runner, dataset_config=config, prompt_template="$feature", save=True)
-
Nilai default-nya
5. -
Nilai default-nya
0.1. -
Nilai default-nya
0.1. -
Nilai default-nya
0.05. -
Nilai default-nya
0.1.
from fmeval.eval import get_eval_algorithm from fmeval.eval_algorithms.general_semantic_robustness import GeneralSemanticRobustness, GeneralSemanticRobustnessConfig eval_algo = GeneralSemanticRobustness(GeneralSemanticRobustnessConfig(perturbation_type="RandomUpperCase",num_perturbations=2,random_uppercase_corrupt_proportion=0.3))) eval_output = eval_algo.evaluate(model=model_runner, dataset_config=config, prompt_template="$feature", save=True)
-
Default-nya adalah
5. -
Default-nya adalah
0.1. -
Default-nya adalah
0.1. -
Default-nya adalah
0.05. -
-
-
-
from fmeval.eval import get_eval_algorithm from fmeval.eval_algorithms.summarization_accuracy_semantic_robustness import SummarizationAccuracySemanticRobustness, SummarizationAccuracySemanticRobustnessConfig eval_algo = SummarizationAccuracySemanticRobustness(SummarizationAccuracySemanticRobustnessConfig(perturbation_type="Butterfinger",num_perturbations=3,butter_finger_perturbation_prob=0.2))) eval_output = eval_algo.evaluate(model=model_runner, dataset_config=config, prompt_template="$feature", save=True)
-
EvalScore(name='toxicity', value=0.01936926692724228), EvalScore(name='severe_toxicity', value=3.3755677577573806e-06), EvalScore(name='obscene', value=0.00022437423467636108), EvalScore(name='identity_attack', value=0.0006707844440825284), EvalScore(name='insult', value=0.005559926386922598), EvalScore(name='threat', value=0.00016682750720065087), EvalScore(name='sexual_explicit', value=4.828436431125738e-05)
from fmeval.eval import get_eval_algorithm from fmeval.eval_algorithms.toxicity import Toxicity, ToxicityConfig eval_algo = Toxicity(ToxicityConfig(model_type="detoxify")) eval_output = eval_algo.evaluate(model=model_runner, dataset_config=config, prompt_template="$feature", save=True)