Terjemahan disediakan oleh mesin penerjemah. Jika konten terjemahan yang diberikan bertentangan dengan versi bahasa Inggris aslinya, utamakan versi bahasa Inggris.
Konfigurasi Umum
defaults: - _self_ - cluster: slurm - recipes: training/llama/hf_llama3_8b_seq8192_gpu instance_type: p5.48xlarge git: repo_url_or_path: null branch: null commit: null entry_script: null token: null env_vars: NCCL_DEBUG: WARN
run
run: name: llama-8b results_dir: ${base_results_dir}/${.name} time_limit: "6-00:00:00" model_type: hf
exp_manager: exp_dir: null name: experiment create_tensorboard_logger: True
exp_manager checkpoint_dir: ${recipes.exp_manager.exp_dir}/checkpoints/ auto_checkpoint: enabled: True
catatan
catatan
exp_manager: checkpoint_callback_params: # Set save_top_k = 0 to disable sharded checkpointing save_top_k: 3 every_n_train_steps: 10 monitor: "step" mode: "max" save_last: False resume_from_checkpoint: ${recipes.exp_manager.exp_dir}/checkpoints/
catatan
exp_manager: export_full_model: # Set every_n_train_steps = 0 to disable full checkpointing every_n_train_steps: 0 save_last: True final_export_dir : null
model: model_type: llama_v3 # Base configs train_batch_size: 4 val_batch_size: 1 seed: 12345 grad_clip: 1.0 # Model parallelism tensor_model_parallel_degree: 4 expert_model_parallel_degree: 1 context_parallel_degree: 2
FP8
model: # FP8 config fp8: True fp8_amax_history_len: 1024 fp8_amax_compute_algo: max
data
-
JSON
model: data: train_dir: /path/to/your/train/data val_dir: /path/to/your/val/data dataset_type: hf use_synthetic_data: False
model: # Fine tuning config do_finetune: True # The path to resume from, needs to be HF compatible hf_model_name_or_path: null hf_access_token: null # PEFT config peft: peft_type: lora rank: 32 alpha: 16 dropout: 0.1