|
|
|
@ -35,7 +35,7 @@ torchrun $DISTRIBUTED_ARGS finetune.py \
|
|
|
|
|
--save_strategy "steps" \
|
|
|
|
|
--save_steps 1000 \
|
|
|
|
|
--save_total_limit 10 \
|
|
|
|
|
--learning_rate 1e-5 \
|
|
|
|
|
--learning_rate 3e-4 \
|
|
|
|
|
--weight_decay 0.1 \
|
|
|
|
|
--adam_beta2 0.95 \
|
|
|
|
|
--warmup_ratio 0.01 \
|
|
|
|
@ -47,4 +47,4 @@ torchrun $DISTRIBUTED_ARGS finetune.py \
|
|
|
|
|
--use_lora \
|
|
|
|
|
--q_lora \
|
|
|
|
|
--gradient_checkpointing \
|
|
|
|
|
--deepspeed finetune/ds_config_zero2.json
|
|
|
|
|
--deepspeed finetune/ds_config_zero2.json
|
|
|
|
|