From 4d520d5626eaf8daa8b997a8e665e4f8b97d9306 Mon Sep 17 00:00:00 2001 From: hyperzlib Date: Thu, 25 Apr 2024 17:09:36 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=94=B9LoRA=E8=AE=AD=E7=BB=83?= =?UTF-8?q?=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 +++- finetune/ds_config_zero2.json | 8 +++++--- finetune/ds_config_zero3.json | 10 ++++++---- finetune/finetune_lora_single_gpu.sh | 5 +++-- finetune/finetune_qlora_single_gpu.sh | 0 5 files changed, 17 insertions(+), 10 deletions(-) mode change 100644 => 100755 finetune/finetune_lora_single_gpu.sh mode change 100644 => 100755 finetune/finetune_qlora_single_gpu.sh diff --git a/.gitignore b/.gitignore index 9a7fbea..c5321bf 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ build /private/ output_qwen/ -output_qwen*/ \ No newline at end of file +output_qwen*/ +dataset/ +!dataset/.empty \ No newline at end of file diff --git a/finetune/ds_config_zero2.json b/finetune/ds_config_zero2.json index 4be2c0b..420cefe 100644 --- a/finetune/ds_config_zero2.json +++ b/finetune/ds_config_zero2.json @@ -21,11 +21,13 @@ }, "scheduler": { - "type": "WarmupLR", + "type": "WarmupDecayLR", "params": { - "warmup_min_lr": "auto", + "warmup_min_lr": 0, "warmup_max_lr": "auto", - "warmup_num_steps": "auto" + "warmup_num_steps": "auto", + "warmup_type": "linear", + "total_num_steps": "auto" } }, diff --git a/finetune/ds_config_zero3.json b/finetune/ds_config_zero3.json index e30fe94..52c53fd 100644 --- a/finetune/ds_config_zero3.json +++ b/finetune/ds_config_zero3.json @@ -8,7 +8,7 @@ "min_loss_scale": 1 }, "bf16": { - "enabled": "auto" + "enabled": false }, "optimizer": { "type": "AdamW", @@ -21,11 +21,13 @@ }, "scheduler": { - "type": "WarmupLR", + "type": "WarmupDecayLR", "params": { - "warmup_min_lr": "auto", + "warmup_min_lr": 0, "warmup_max_lr": "auto", - "warmup_num_steps": "auto" + "warmup_num_steps": "auto", + "warmup_type": "linear", + "total_num_steps": "auto" } }, diff --git a/finetune/finetune_lora_single_gpu.sh b/finetune/finetune_lora_single_gpu.sh old mode 100644 new mode 100755 index 7c99e4b..7fec27a --- a/finetune/finetune_lora_single_gpu.sh +++ b/finetune/finetune_lora_single_gpu.sh @@ -39,7 +39,6 @@ export CUDA_VISIBLE_DEVICES=0 python finetune.py \ --model_name_or_path $MODEL \ --data_path $DATA \ - --bf16 True \ --output_dir output_qwen \ --num_train_epochs 16 \ --per_device_train_batch_size 2 \ @@ -59,7 +58,9 @@ python finetune.py \ --model_max_length 3072 \ --lazy_preprocess True \ --gradient_checkpointing \ - --use_lora + --use_lora \ + --fp16 True \ + --deepspeed finetune/ds_config_zero2.json # If you use fp16 instead of bf16, you should use deepspeed # --fp16 True --deepspeed finetune/ds_config_zero2.json diff --git a/finetune/finetune_qlora_single_gpu.sh b/finetune/finetune_qlora_single_gpu.sh old mode 100644 new mode 100755