finetune添加system_prompt支持

main
落雨楓 9 months ago
parent 5352d86eaa
commit 851ccfcea7

@ -9,10 +9,13 @@ import argparse
import os import os
import platform import platform
import shutil import shutil
import json
import json5
from copy import deepcopy from copy import deepcopy
import torch import torch
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import AutoPeftModelForCausalLM
from transformers.generation import GenerationConfig from transformers.generation import GenerationConfig
from transformers.trainer_utils import set_seed from transformers.trainer_utils import set_seed
@ -40,8 +43,52 @@ Commands:
:reset-conf Reset generation config 重置生成配置 :reset-conf Reset generation config 重置生成配置
''' '''
TOOL_DESC = """{name_for_model}: 你可以调用该工具与 {name_for_human} API 进行交互。{name_for_human} API 有什么作用?{description_for_model} 参数列表:{parameters}"""
REACT_INSTRUCTION = """请尽可能回答下列问题。您可以访问以下 API
{tools_text}
使用以下格式回答问题
Question: 你需要回答的问题
Thought: 你的思考过程
Action: 要使用的操作必须是 [{tools_name_text}] 其中之一
Action Input: 操作的输入参数
Observation: 操作的结果
... (这些 Thought/Action/Action Input/Observation 可以是零次或重复多次)
Thought: 你的最终思考过程
Final Answer: 你的最终回答"""
def build_react_instruction(functions: list[dict]):
tools_text = []
tools_name_text = []
for func_info in functions:
name = func_info.get("name", "")
name_m = func_info.get("name_for_model", name)
name_h = func_info.get("name_for_human", name)
desc = func_info.get("description", "")
desc_m = func_info.get("description_for_model", desc)
tool = TOOL_DESC.format(
name_for_model=name_m,
name_for_human=name_h,
description_for_model=desc_m,
parameters=json.dumps(func_info["parameters"], ensure_ascii=False),
)
tools_text.append(tool)
tools_name_text.append(name_m)
tools_text = "\n\n".join(tools_text)
tools_name_text = ", ".join(tools_name_text)
instruction = REACT_INSTRUCTION.format(
tools_text=tools_text,
tools_name_text=tools_name_text,
)
return instruction
def _load_model_tokenizer(args): def _load_model_tokenizer(args):
model_path = args.model_path or args.checkpoint_path
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
args.checkpoint_path, trust_remote_code=True, resume_download=True, args.checkpoint_path, trust_remote_code=True, resume_download=True,
) )
@ -49,9 +96,9 @@ def _load_model_tokenizer(args):
if args.cpu_only: if args.cpu_only:
device_map = "cpu" device_map = "cpu"
else: else:
device_map = "auto" device_map = "cuda"
model = AutoModelForCausalLM.from_pretrained( model = AutoPeftModelForCausalLM.from_pretrained(
args.checkpoint_path, args.checkpoint_path,
device_map=device_map, device_map=device_map,
trust_remote_code=True, trust_remote_code=True,
@ -59,7 +106,7 @@ def _load_model_tokenizer(args):
).eval() ).eval()
config = GenerationConfig.from_pretrained( config = GenerationConfig.from_pretrained(
args.checkpoint_path, trust_remote_code=True, resume_download=True, model_path, trust_remote_code=True, resume_download=True,
) )
return model, tokenizer, config return model, tokenizer, config
@ -107,7 +154,13 @@ def main():
description='QWen-Chat command-line interactive chat demo.') description='QWen-Chat command-line interactive chat demo.')
parser.add_argument("-c", "--checkpoint-path", type=str, default=DEFAULT_CKPT_PATH, parser.add_argument("-c", "--checkpoint-path", type=str, default=DEFAULT_CKPT_PATH,
help="Checkpoint name or path, default to %(default)r") help="Checkpoint name or path, default to %(default)r")
parser.add_argument("-m", "--model-path", type=str, default=None,
help="Model name or path, default to None")
parser.add_argument("-s", "--seed", type=int, default=1234, help="Random seed") parser.add_argument("-s", "--seed", type=int, default=1234, help="Random seed")
parser.add_argument("-sf", "--system-prompt-file", type=str, default=None,
help="System prompt file, default to None")
parser.add_argument("-fd", "--function-definition", type=str, default=None,
help="Function definition file, should be json or json5, default to None")
parser.add_argument("--cpu-only", action="store_true", help="Run demo with CPU only") parser.add_argument("--cpu-only", action="store_true", help="Run demo with CPU only")
args = parser.parse_args() args = parser.parse_args()
@ -116,11 +169,24 @@ def main():
model, tokenizer, config = _load_model_tokenizer(args) model, tokenizer, config = _load_model_tokenizer(args)
orig_gen_config = deepcopy(model.generation_config) orig_gen_config = deepcopy(model.generation_config)
system_prompt = "You are a helpful assistant."
if args.system_prompt_file:
with open(args.system_prompt_file, 'r', encoding="utf-8") as f:
system_prompt = f.read()
function_prompt = None
if args.function_definition:
with open(args.function_definition, 'r', encoding="utf-8") as f:
functions = json5.load(f)
function_prompt = build_react_instruction(functions)
_clear_screen() _clear_screen()
print(_WELCOME_MSG) print(_WELCOME_MSG)
seed = args.seed seed = args.seed
is_first_msg = True
while True: while True:
query = _get_input() query = _get_input()
@ -195,7 +261,11 @@ def main():
# Run chat. # Run chat.
set_seed(seed) set_seed(seed)
try: try:
for response in model.chat_stream(tokenizer, query, history=history, generation_config=config): prompt = query
if function_prompt:
prompt = f"{function_prompt}\n\nQuestion: {query}"
for response in model.chat_stream(tokenizer, prompt, history=history, system=system_prompt, generation_config=config):
_clear_screen() _clear_screen()
print(f"\nUser: {query}") print(f"\nUser: {query}")
print(f"\nQwen-Chat: {response}") print(f"\nQwen-Chat: {response}")

@ -88,36 +88,37 @@ def main():
example_functions = [ example_functions = [
{ {
"name_for_human": "Google Search", "name_for_human": "在线搜索",
"name_for_model": "google_search", "name_for_model": "search",
"description_for_model": "Google Search is a general search engine that can be used to access the internet," "description_for_model": "使用此工具可以搜索群内成员的信息或者在互联网上搜索信息。Format the arguments as a JSON object.", # If you expect `Action Input` to be a JSON.
+ " query encyclopedia knowledge, and stay informed about current events."
+ " Format the arguments as a JSON object.", # If you expect `Action Input` to be a JSON.
"parameters": [ "parameters": [
{ {
"name": "search_query", "name": "keywords",
"description": "Search keywords or phrases", "description": "需要搜索的关键词。",
"required": True, # Set to False if it is an optional parameter. "required": True,
"schema": {"type": "string"}, "schema": {"type": "string"},
}, },
# You can add more parameters to this `parameters` list if you wish.
], ],
}, },
{ {
"name_for_human": "Code Interpreter", "name_for_human": "天气信息",
"name_for_model": "code_interpreter", "name_for_model": "get_weather",
"description_for_model": "Code interpreter that can execute Python code." "description_for_model": "查询某个地点的天气信息。Format the arguments as a JSON object.",
+ "Enclose the code within triple backticks (`)"
+ " at the beginning and end of the code.", # If you expect `Action Input` to be a Markdown code block.
"parameters": [ "parameters": [
{ {
"name": "code", "name": "position",
"description": "Code to be executed", "description": "需要查询天气的地点。",
"required": True, "required": True,
"schema": {"type": "string"}, "schema": {"type": "string"},
}, },
], ],
}, },
{
"name_for_human": "封禁用户",
"name_for_model": "ban_user",
"description_for_model": "在用户多次触犯道德规范时,使用此工具可以封禁用户。",
"parameters": [],
}
] ]
example_instruction = build_react_instruction(example_functions) example_instruction = build_react_instruction(example_functions)

@ -221,7 +221,9 @@ class LazySupervisedDataset(Dataset):
if i in self.cached_data_dict: if i in self.cached_data_dict:
return self.cached_data_dict[i] return self.cached_data_dict[i]
ret = preprocess([self.raw_data[i]["conversations"]], self.tokenizer, self.max_len) system_prompt: str = self.raw_data[i].get("system") or "You are a helpful assistant."
ret = preprocess([self.raw_data[i]["conversations"]], self.tokenizer, self.max_len, system_prompt)
ret = dict( ret = dict(
input_ids=ret["input_ids"][0], input_ids=ret["input_ids"][0],
labels=ret["labels"][0], labels=ret["labels"][0],

@ -4,7 +4,7 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
MODEL="Qwen/Qwen-7B" # Set the path if you do not want to load from huggingface directly MODEL="Qwen/Qwen-7B" # Set the path if you do not want to load from huggingface directly
# ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations. # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
# See the section for finetuning in README for more information. # See the section for finetuning in README for more information.
DATA="path_to_data" DATA="../dataset/kurita.json"
function usage() { function usage() {
echo ' echo '

Loading…
Cancel
Save