From 4d520d5626eaf8daa8b997a8e665e4f8b97d9306 Mon Sep 17 00:00:00 2001
From: hyperzlib <hyperzlib@outlook.com>
Date: Thu, 25 Apr 2024 17:09:36 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=94=B9LoRA=E8=AE=AD=E7=BB=83?=
 =?UTF-8?q?=E8=84=9A=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                            |  4 +++-
 finetune/ds_config_zero2.json         |  8 +++++---
 finetune/ds_config_zero3.json         | 10 ++++++----
 finetune/finetune_lora_single_gpu.sh  |  5 +++--
 finetune/finetune_qlora_single_gpu.sh |  0
 5 files changed, 17 insertions(+), 10 deletions(-)
 mode change 100644 => 100755 finetune/finetune_lora_single_gpu.sh
 mode change 100644 => 100755 finetune/finetune_qlora_single_gpu.sh

diff --git a/.gitignore b/.gitignore
index 9a7fbea..c5321bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,6 @@ build
 
 /private/
 output_qwen/
-output_qwen*/
\ No newline at end of file
+output_qwen*/
+dataset/
+!dataset/.empty
\ No newline at end of file
diff --git a/finetune/ds_config_zero2.json b/finetune/ds_config_zero2.json
index 4be2c0b..420cefe 100644
--- a/finetune/ds_config_zero2.json
+++ b/finetune/ds_config_zero2.json
@@ -21,11 +21,13 @@
     },
 
     "scheduler": {
-        "type": "WarmupLR",
+        "type": "WarmupDecayLR",
         "params": {
-            "warmup_min_lr": "auto",
+            "warmup_min_lr": 0,
             "warmup_max_lr": "auto",
-            "warmup_num_steps": "auto"
+            "warmup_num_steps": "auto",
+            "warmup_type": "linear",
+            "total_num_steps": "auto"
         }
     },
 
diff --git a/finetune/ds_config_zero3.json b/finetune/ds_config_zero3.json
index e30fe94..52c53fd 100644
--- a/finetune/ds_config_zero3.json
+++ b/finetune/ds_config_zero3.json
@@ -8,7 +8,7 @@
         "min_loss_scale": 1
     },
     "bf16": {
-        "enabled": "auto"
+        "enabled": false
     },
     "optimizer": {
         "type": "AdamW",
@@ -21,11 +21,13 @@
     },
 
     "scheduler": {
-        "type": "WarmupLR",
+        "type": "WarmupDecayLR",
         "params": {
-            "warmup_min_lr": "auto",
+            "warmup_min_lr": 0,
             "warmup_max_lr": "auto",
-            "warmup_num_steps": "auto"
+            "warmup_num_steps": "auto",
+            "warmup_type": "linear",
+            "total_num_steps": "auto"
         }
     },
 
diff --git a/finetune/finetune_lora_single_gpu.sh b/finetune/finetune_lora_single_gpu.sh
old mode 100644
new mode 100755
index 7c99e4b..7fec27a
--- a/finetune/finetune_lora_single_gpu.sh
+++ b/finetune/finetune_lora_single_gpu.sh
@@ -39,7 +39,6 @@ export CUDA_VISIBLE_DEVICES=0
 python finetune.py \
   --model_name_or_path $MODEL \
   --data_path $DATA \
-  --bf16 True \
   --output_dir output_qwen \
   --num_train_epochs 16 \
   --per_device_train_batch_size 2 \
@@ -59,7 +58,9 @@ python finetune.py \
   --model_max_length 3072 \
   --lazy_preprocess True \
   --gradient_checkpointing \
-  --use_lora
+  --use_lora \
+  --fp16 True \
+  --deepspeed finetune/ds_config_zero2.json
 
 # If you use fp16 instead of bf16, you should use deepspeed
 # --fp16 True --deepspeed finetune/ds_config_zero2.json
diff --git a/finetune/finetune_qlora_single_gpu.sh b/finetune/finetune_qlora_single_gpu.sh
old mode 100644
new mode 100755