|
|
@ -39,7 +39,6 @@ export CUDA_VISIBLE_DEVICES=0
|
|
|
|
python finetune.py \
|
|
|
|
python finetune.py \
|
|
|
|
--model_name_or_path $MODEL \
|
|
|
|
--model_name_or_path $MODEL \
|
|
|
|
--data_path $DATA \
|
|
|
|
--data_path $DATA \
|
|
|
|
--bf16 True \
|
|
|
|
|
|
|
|
--output_dir output_qwen \
|
|
|
|
--output_dir output_qwen \
|
|
|
|
--num_train_epochs 16 \
|
|
|
|
--num_train_epochs 16 \
|
|
|
|
--per_device_train_batch_size 2 \
|
|
|
|
--per_device_train_batch_size 2 \
|
|
|
@ -59,7 +58,9 @@ python finetune.py \
|
|
|
|
--model_max_length 3072 \
|
|
|
|
--model_max_length 3072 \
|
|
|
|
--lazy_preprocess True \
|
|
|
|
--lazy_preprocess True \
|
|
|
|
--gradient_checkpointing \
|
|
|
|
--gradient_checkpointing \
|
|
|
|
--use_lora
|
|
|
|
--use_lora \
|
|
|
|
|
|
|
|
--fp16 True \
|
|
|
|
|
|
|
|
--deepspeed finetune/ds_config_zero2.json
|
|
|
|
|
|
|
|
|
|
|
|
# If you use fp16 instead of bf16, you should use deepspeed
|
|
|
|
# If you use fp16 instead of bf16, you should use deepspeed
|
|
|
|
# --fp16 True --deepspeed finetune/ds_config_zero2.json
|
|
|
|
# --fp16 True --deepspeed finetune/ds_config_zero2.json
|
|
|
|