Terjemahan disediakan oleh mesin penerjemah. Jika konten terjemahan yang diberikan bertentangan dengan versi bahasa Inggris aslinya, utamakan versi bahasa Inggris.
Siapkan lingkungan Anda
Prasyarat
-
!pip install fmeval !pip3 install boto3==1.28.65 -
import boto3 import json import os # Dependent on available hardware and memory os.environ["PARALLELIZATION_FACTOR"] = "1" # Bedrock clients for model inference bedrock = boto3.client(service_name='bedrock') bedrock_runtime = boto3.client(service_name='bedrock-runtime') -
-
import glob # Check for the built-in dataset if not glob.glob("sample-dataset.jsonl"): print("ERROR - please make sure file exists: sample-dataset.jsonl")
-
import json model_id = 'anthropic.claude-v2' accept = "application/json" contentType = "application/json" # Ensure that your prompt has the correct format prompt_data = """Human: Who is Barack Obama? Assistant: """ -
body = json.dumps({"prompt": prompt_data, "max_tokens_to_sample": 500}) response = bedrock_runtime.invoke_model( body=body, modelId=model_id, accept=accept, contentType=contentType ) response_body = json.loads(response.get("body").read()) print(response_body.get("completion")) -
-
from fmeval.data_loaders.data_config import DataConfig from fmeval.model_runners.bedrock_model_runner import BedrockModelRunner from fmeval.constants import MIME_TYPE_JSONLINES from fmeval.eval_algorithms.summarization_accuracy import SummarizationAccuracy, SummarizationAccuracyConfig -
{ "document": "23 October 2015 Last updated at 17:44 BST\nIt's the highest rating a tropical storm can get and is the first one of this magnitude to hit mainland Mexico since 1959.\nBut how are the categories decided and what do they mean? Newsround reporter Jenny Lawrence explains.", "summary": "Hurricane Patricia has been rated as a category 5 storm.", "id": "34615665", }config = DataConfig( dataset_name="sample-dataset", dataset_uri="sample-dataset.jsonl", dataset_mime_type=MIME_TYPE_JSONLINES, model_input_location="document", target_output_location="summary" ) -
bedrock_model_runner = BedrockModelRunner( model_id=model_id, output='completion', content_template='{"prompt": $prompt, "max_tokens_to_sample": 500}' )-
-
"content_template": "{\"prompt\": $prompt, \"max_tokens_to_sample\": 500}""content_template": "{\"inputs\": $prompt, \"parameters\":{\"max_new_tokens\": \ 10, \"top_p\": 0.9, \"temperature\": 0.8}}" -
-
-
eval_algo = SummarizationAccuracy(SummarizationAccuracyConfig()) -
eval_output = eval_algo.evaluate(model=bedrock_model_runner, dataset_config=config, prompt_template="Human: $feature\n\nAssistant:\n", save=True)
-
# parse report print(json.dumps(eval_output, default=vars, indent=4))[ { "eval_name": "summarization_accuracy", "dataset_name": "sample-dataset", "dataset_scores": [ { "name": "meteor", "value": 0.2048823008681274 }, { "name": "rouge", "value": 0.03557697913367101 }, { "name": "bertscore", "value": 0.5406564395678671 } ], "prompt_template": "Human: $feature\n\nAssistant:\n", "category_scores": null, "output_path": "/tmp/eval_results/summarization_accuracy_sample_dataset.jsonl", "error": null } ] -
import pandas as pd data = [] with open("/tmp/eval_results/summarization_accuracy_sample_dataset.jsonl", "r") as file: for line in file: data.append(json.loads(line)) df = pd.DataFrame(data) df['meteor_score'] = df['scores'].apply(lambda x: x[0]['value']) df['rouge_score'] = df['scores'].apply(lambda x: x[1]['value']) df['bert_score'] = df['scores'].apply(lambda x: x[2]['value']) dfmodel_input model_output target_output prompt scores meteor_score rouge_score bert_score 0 John Edward Bates, formerly of Spalding, Linco... I cannot make any definitive judgments, as th... A former Lincolnshire Police officer carried o... Human: John Edward Bates, formerly of Spalding... [{'name': 'meteor', 'value': 0.112359550561797... 0.112360 0.000000 0.543234 ... 1 23 October 2015 Last updated at 17:44 BST\nIt'... Here are some key points about hurricane/trop... Hurricane Patricia has been rated as a categor... Human: 23 October 2015 Last updated at 17:44 B... [{'name': 'meteor', 'value': 0.139822692925566... 0.139823 0.017621 0.426529 ... 2 Ferrari appeared in a position to challenge un... Here are the key points from the article:\n\n... Lewis Hamilton stormed to pole position at the... Human: Ferrari appeared in a position to chall... [{'name': 'meteor', 'value': 0.283411142234671... 0.283411 0.064516 0.597001 ... 3 The Bath-born player, 28, has made 36 appearan... Okay, let me summarize the key points from th... Newport Gwent Dragons number eight Ed Jackson ... Human: The Bath-born player, 28, has made 36 a... [{'name': 'meteor', 'value': 0.089020771513353... 0.089021 0.000000 0.533514 ... ...