diff --git a/.gitignore b/.gitignore index 03677b0..e18fde1 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,6 @@ build /private/ output_qwen/ -output_qwen*/ \ No newline at end of file +output_qwen*/ +dataset/ +!dataset/.empty \ No newline at end of file diff --git a/finetune/ds_config_zero2.json b/finetune/ds_config_zero2.json index 4be2c0b..420cefe 100644 --- a/finetune/ds_config_zero2.json +++ b/finetune/ds_config_zero2.json @@ -21,11 +21,13 @@ }, "scheduler": { - "type": "WarmupLR", + "type": "WarmupDecayLR", "params": { - "warmup_min_lr": "auto", + "warmup_min_lr": 0, "warmup_max_lr": "auto", - "warmup_num_steps": "auto" + "warmup_num_steps": "auto", + "warmup_type": "linear", + "total_num_steps": "auto" } }, diff --git a/finetune/ds_config_zero3.json b/finetune/ds_config_zero3.json index e30fe94..52c53fd 100644 --- a/finetune/ds_config_zero3.json +++ b/finetune/ds_config_zero3.json @@ -8,7 +8,7 @@ "min_loss_scale": 1 }, "bf16": { - "enabled": "auto" + "enabled": false }, "optimizer": { "type": "AdamW", @@ -21,11 +21,13 @@ }, "scheduler": { - "type": "WarmupLR", + "type": "WarmupDecayLR", "params": { - "warmup_min_lr": "auto", + "warmup_min_lr": 0, "warmup_max_lr": "auto", - "warmup_num_steps": "auto" + "warmup_num_steps": "auto", + "warmup_type": "linear", + "total_num_steps": "auto" } }, diff --git a/finetune/finetune_lora_single_gpu.sh b/finetune/finetune_lora_single_gpu.sh old mode 100644 new mode 100755 index 7c99e4b..7fec27a --- a/finetune/finetune_lora_single_gpu.sh +++ b/finetune/finetune_lora_single_gpu.sh @@ -39,7 +39,6 @@ export CUDA_VISIBLE_DEVICES=0 python finetune.py \ --model_name_or_path $MODEL \ --data_path $DATA \ - --bf16 True \ --output_dir output_qwen \ --num_train_epochs 16 \ --per_device_train_batch_size 2 \ @@ -59,7 +58,9 @@ python finetune.py \ --model_max_length 3072 \ --lazy_preprocess True \ --gradient_checkpointing \ - --use_lora + --use_lora \ + --fp16 True \ + --deepspeed finetune/ds_config_zero2.json # If you use fp16 instead of bf16, you should use deepspeed # --fp16 True --deepspeed finetune/ds_config_zero2.json diff --git a/finetune/finetune_qlora_single_gpu.sh b/finetune/finetune_qlora_single_gpu.sh old mode 100644 new mode 100755