diff --git a/.gitignore b/.gitignore
index 03677b0..e18fde1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,6 @@ build
 
 /private/
 output_qwen/
-output_qwen*/
\ No newline at end of file
+output_qwen*/
+dataset/
+!dataset/.empty
\ No newline at end of file
diff --git a/finetune/ds_config_zero2.json b/finetune/ds_config_zero2.json
index 4be2c0b..420cefe 100644
--- a/finetune/ds_config_zero2.json
+++ b/finetune/ds_config_zero2.json
@@ -21,11 +21,13 @@
     },
 
     "scheduler": {
-        "type": "WarmupLR",
+        "type": "WarmupDecayLR",
         "params": {
-            "warmup_min_lr": "auto",
+            "warmup_min_lr": 0,
             "warmup_max_lr": "auto",
-            "warmup_num_steps": "auto"
+            "warmup_num_steps": "auto",
+            "warmup_type": "linear",
+            "total_num_steps": "auto"
         }
     },
 
diff --git a/finetune/ds_config_zero3.json b/finetune/ds_config_zero3.json
index e30fe94..52c53fd 100644
--- a/finetune/ds_config_zero3.json
+++ b/finetune/ds_config_zero3.json
@@ -8,7 +8,7 @@
         "min_loss_scale": 1
     },
     "bf16": {
-        "enabled": "auto"
+        "enabled": false
     },
     "optimizer": {
         "type": "AdamW",
@@ -21,11 +21,13 @@
     },
 
     "scheduler": {
-        "type": "WarmupLR",
+        "type": "WarmupDecayLR",
         "params": {
-            "warmup_min_lr": "auto",
+            "warmup_min_lr": 0,
             "warmup_max_lr": "auto",
-            "warmup_num_steps": "auto"
+            "warmup_num_steps": "auto",
+            "warmup_type": "linear",
+            "total_num_steps": "auto"
         }
     },
 
diff --git a/finetune/finetune_lora_single_gpu.sh b/finetune/finetune_lora_single_gpu.sh
old mode 100644
new mode 100755
index 7c99e4b..7fec27a
--- a/finetune/finetune_lora_single_gpu.sh
+++ b/finetune/finetune_lora_single_gpu.sh
@@ -39,7 +39,6 @@ export CUDA_VISIBLE_DEVICES=0
 python finetune.py \
   --model_name_or_path $MODEL \
   --data_path $DATA \
-  --bf16 True \
   --output_dir output_qwen \
   --num_train_epochs 16 \
   --per_device_train_batch_size 2 \
@@ -59,7 +58,9 @@ python finetune.py \
   --model_max_length 3072 \
   --lazy_preprocess True \
   --gradient_checkpointing \
-  --use_lora
+  --use_lora \
+  --fp16 True \
+  --deepspeed finetune/ds_config_zero2.json
 
 # If you use fp16 instead of bf16, you should use deepspeed
 # --fp16 True --deepspeed finetune/ds_config_zero2.json
diff --git a/finetune/finetune_qlora_single_gpu.sh b/finetune/finetune_qlora_single_gpu.sh
old mode 100644
new mode 100755