Merge branch 'main' of git-ssh.isekai.cn:hyperzlib/Isekai-Qwen

main
落雨楓 9 months ago
commit 11ecfb7180

2
.gitignore vendored

@ -12,3 +12,5 @@ build
/private/
output_qwen/
output_qwen*/
dataset/
!dataset/.empty

@ -21,11 +21,13 @@
},
"scheduler": {
"type": "WarmupLR",
"type": "WarmupDecayLR",
"params": {
"warmup_min_lr": "auto",
"warmup_min_lr": 0,
"warmup_max_lr": "auto",
"warmup_num_steps": "auto"
"warmup_num_steps": "auto",
"warmup_type": "linear",
"total_num_steps": "auto"
}
},

@ -8,7 +8,7 @@
"min_loss_scale": 1
},
"bf16": {
"enabled": "auto"
"enabled": false
},
"optimizer": {
"type": "AdamW",
@ -21,11 +21,13 @@
},
"scheduler": {
"type": "WarmupLR",
"type": "WarmupDecayLR",
"params": {
"warmup_min_lr": "auto",
"warmup_min_lr": 0,
"warmup_max_lr": "auto",
"warmup_num_steps": "auto"
"warmup_num_steps": "auto",
"warmup_type": "linear",
"total_num_steps": "auto"
}
},

@ -39,7 +39,6 @@ export CUDA_VISIBLE_DEVICES=0
python finetune.py \
--model_name_or_path $MODEL \
--data_path $DATA \
--bf16 True \
--output_dir output_qwen \
--num_train_epochs 16 \
--per_device_train_batch_size 2 \
@ -59,7 +58,9 @@ python finetune.py \
--model_max_length 3072 \
--lazy_preprocess True \
--gradient_checkpointing \
--use_lora
--use_lora \
--fp16 True \
--deepspeed finetune/ds_config_zero2.json
# If you use fp16 instead of bf16, you should use deepspeed
# --fp16 True --deepspeed finetune/ds_config_zero2.json

Loading…
Cancel
Save