Merge branch 'main' of git-ssh.isekai.cn:hyperzlib/Isekai-Qwen

main
落雨楓 9 months ago
commit 11ecfb7180

2
.gitignore vendored

@ -12,3 +12,5 @@ build
/private/ /private/
output_qwen/ output_qwen/
output_qwen*/ output_qwen*/
dataset/
!dataset/.empty

@ -21,11 +21,13 @@
}, },
"scheduler": { "scheduler": {
"type": "WarmupLR", "type": "WarmupDecayLR",
"params": { "params": {
"warmup_min_lr": "auto", "warmup_min_lr": 0,
"warmup_max_lr": "auto", "warmup_max_lr": "auto",
"warmup_num_steps": "auto" "warmup_num_steps": "auto",
"warmup_type": "linear",
"total_num_steps": "auto"
} }
}, },

@ -8,7 +8,7 @@
"min_loss_scale": 1 "min_loss_scale": 1
}, },
"bf16": { "bf16": {
"enabled": "auto" "enabled": false
}, },
"optimizer": { "optimizer": {
"type": "AdamW", "type": "AdamW",
@ -21,11 +21,13 @@
}, },
"scheduler": { "scheduler": {
"type": "WarmupLR", "type": "WarmupDecayLR",
"params": { "params": {
"warmup_min_lr": "auto", "warmup_min_lr": 0,
"warmup_max_lr": "auto", "warmup_max_lr": "auto",
"warmup_num_steps": "auto" "warmup_num_steps": "auto",
"warmup_type": "linear",
"total_num_steps": "auto"
} }
}, },

@ -39,7 +39,6 @@ export CUDA_VISIBLE_DEVICES=0
python finetune.py \ python finetune.py \
--model_name_or_path $MODEL \ --model_name_or_path $MODEL \
--data_path $DATA \ --data_path $DATA \
--bf16 True \
--output_dir output_qwen \ --output_dir output_qwen \
--num_train_epochs 16 \ --num_train_epochs 16 \
--per_device_train_batch_size 2 \ --per_device_train_batch_size 2 \
@ -59,7 +58,9 @@ python finetune.py \
--model_max_length 3072 \ --model_max_length 3072 \
--lazy_preprocess True \ --lazy_preprocess True \
--gradient_checkpointing \ --gradient_checkpointing \
--use_lora --use_lora \
--fp16 True \
--deepspeed finetune/ds_config_zero2.json
# If you use fp16 instead of bf16, you should use deepspeed # If you use fp16 instead of bf16, you should use deepspeed
# --fp16 True --deepspeed finetune/ds_config_zero2.json # --fp16 True --deepspeed finetune/ds_config_zero2.json

Loading…
Cancel
Save