# LangChain retrieval knowledge base Q&A based on Qwen-7B-Chat

This notebook introduces a question-answering application based on a local knowledge base using Qwen-7B-Chat with langchain. The goal is to establish a knowledge base Q&A solution that is friendly to many scenarios and open-source models, and that can run offline. The implementation process of this project includes loading files -> reading text -> segmenting text -> vectorizing text -> vectorizing questions -> matching the top k most similar text vectors with the question vectors -> incorporating the matched text as context along with the question into the prompt -> submitting to the LLM (Large Language Model) to generate an answer.

## Preparation

Download Qwen-7B-Chat

Firstly, we need to download the model. You can use the snapshot_download that comes with modelscope to download the model to a specified directory.

In [None]:
!pip install modelscope

In [None]:
from modelscope.hub.snapshot_download import snapshot_download
snapshot_download("Qwen/Qwen-7B-Chat",cache_dir='/tmp/models') 

Download the dependencies for langchain and Qwen.

In [None]:
!pip install langchain==0.0.187 dashscope==1.0.4 sentencepiece==0.1.99 cpm_kernels==1.0.11 nltk==3.8.1 sentence_transformers==2.2.2 unstructured==0.6.5 faiss-cpu==1.7.4 icetk==0.0.7

Download the retrieval document.

In [None]:
!wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/tutorials/qwen_recipes/LLM_Survey_Chinese.pdf.txt

Download the text2vec model, for Chinese in our case.

In [None]:
!wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/tutorials/qwen_recipes/GanymedeNil_text2vec-large-chinese.tar.gz
!tar -zxvf GanymedeNil_text2vec-large-chinese.tar.gz -C /tmp

## Try out the model 

Load the Qwen-7B-Chat model.

In [None]:
from abc import ABC
from langchain.llms.base import LLM
from typing import Any, List, Mapping, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from transformers import AutoModelForCausalLM, AutoTokenizer

model_path="/tmp/models/Qwen/Qwen-7B-Chat"

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).half().cuda()
model.eval()

class Qwen(LLM, ABC):
 max_token: int = 10000
 temperature: float = 0.01
 top_p = 0.9
 history_len: int = 3

 def __init__(self):
 super().__init__()

 @property
 def _llm_type(self) -> str:
 return "Qwen"

 @property
 def _history_len(self) -> int:
 return self.history_len

 def set_history_len(self, history_len: int = 10) -> None:
 self.history_len = history_len

 def _call(
 self,
 prompt: str,
 stop: Optional[List[str]] = None,
 run_manager: Optional[CallbackManagerForLLMRun] = None,
 ) -> str:
 response, _ = model.chat(tokenizer, prompt, history=[])
 return response
 
 @property
 def _identifying_params(self) -> Mapping[str, Any]:
 """Get the identifying parameters."""
 return {"max_token": self.max_token,
 "temperature": self.temperature,
 "top_p": self.top_p,
 "history_len": self.history_len}
 

Specify the txt file that needs retrieval for knowledge-based Q&A.

In [None]:
import os
import torch
import argparse
from langchain.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from typing import List, Tuple
import numpy as np
from langchain.document_loaders import TextLoader
from chinese_text_splitter import ChineseTextSplitter
from langchain.docstore.document import Document
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import RetrievalQA


def load_file(filepath, sentence_size=100):
 loader = TextLoader(filepath, autodetect_encoding=True)
 textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
 docs = loader.load_and_split(textsplitter)
 write_check_file(filepath, docs)
 return docs


def write_check_file(filepath, docs):
 folder_path = os.path.join(os.path.dirname(filepath), "tmp_files")
 if not os.path.exists(folder_path):
 os.makedirs(folder_path)
 fp = os.path.join(folder_path, 'load_file.txt')
 with open(fp, 'a+', encoding='utf-8') as fout:
 fout.write("filepath=%s,len=%s" % (filepath, len(docs)))
 fout.write('\n')
 for i in docs:
 fout.write(str(i))
 fout.write('\n')
 fout.close()

 
def seperate_list(ls: List[int]) -> List[List[int]]:
 lists = []
 ls1 = [ls[0]]
 for i in range(1, len(ls)):
 if ls[i - 1] + 1 == ls[i]:
 ls1.append(ls[i])
 else:
 lists.append(ls1)
 ls1 = [ls[i]]
 lists.append(ls1)
 return lists


class FAISSWrapper(FAISS):
 chunk_size = 250
 chunk_conent = True
 score_threshold = 0
 
 def similarity_search_with_score_by_vector(
 self, embedding: List[float], k: int = 4
 ) -> List[Tuple[Document, float]]:
 scores, indices = self.index.search(np.array([embedding], dtype=np.float32), k)
 docs = []
 id_set = set()
 store_len = len(self.index_to_docstore_id)
 for j, i in enumerate(indices[0]):
 if i == -1 or 0 < self.score_threshold < scores[0][j]:
 # This happens when not enough docs are returned.
 continue
 _id = self.index_to_docstore_id[i]
 doc = self.docstore.search(_id)
 if not self.chunk_conent:
 if not isinstance(doc, Document):
 raise ValueError(f"Could not find document for id {_id}, got {doc}")
 doc.metadata["score"] = int(scores[0][j])
 docs.append(doc)
 continue
 id_set.add(i)
 docs_len = len(doc.page_content)
 for k in range(1, max(i, store_len - i)):
 break_flag = False
 for l in [i + k, i - k]:
 if 0 <= l < len(self.index_to_docstore_id):
 _id0 = self.index_to_docstore_id[l]
 doc0 = self.docstore.search(_id0)
 if docs_len + len(doc0.page_content) > self.chunk_size:
 break_flag = True
 break
 elif doc0.metadata["source"] == doc.metadata["source"]:
 docs_len += len(doc0.page_content)
 id_set.add(l)
 if break_flag:
 break
 if not self.chunk_conent:
 return docs
 if len(id_set) == 0 and self.score_threshold > 0:
 return []
 id_list = sorted(list(id_set))
 id_lists = seperate_list(id_list)
 for id_seq in id_lists:
 for id in id_seq:
 if id == id_seq[0]:
 _id = self.index_to_docstore_id[id]
 doc = self.docstore.search(_id)
 else:
 _id0 = self.index_to_docstore_id[id]
 doc0 = self.docstore.search(_id0)
 doc.page_content += " " + doc0.page_content
 if not isinstance(doc, Document):
 raise ValueError(f"Could not find document for id {_id}, got {doc}")
 doc_score = min([scores[0][id] for id in [indices[0].tolist().index(i) for i in id_seq if i in indices[0]]])
 doc.metadata["score"] = int(doc_score)
 docs.append((doc, doc_score))
 return docs


if __name__ == '__main__':
 # load docs
 filepath = 'LLM_Survey_Chinese.pdf.txt'
 # LLM name
 LLM_TYPE = 'qwen'
 # Embedding model name
 EMBEDDING_MODEL = 'text2vec'
 # 基于上下文的prompt模版,请务必保留"{question}"和"{context_str}"
 PROMPT_TEMPLATE = """已知信息:
 {context_str} 
 根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}"""
 # Embedding running device
 EMBEDDING_DEVICE = "cuda"
 # return top-k text chunk from vector store
 VECTOR_SEARCH_TOP_K = 3
 # 文本分句长度
 SENTENCE_SIZE = 50
 CHAIN_TYPE = 'stuff'
 llm_model_dict = {
 "qwen": QWen,
 }
 embedding_model_dict = {
 "text2vec": "/tmp/GanymedeNil_text2vec-large-chinese",
 }
 print("loading model start")
 llm = llm_model_dict[LLM_TYPE]()
 embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[EMBEDDING_MODEL],model_kwargs={'device': EMBEDDING_DEVICE})
 print("loading model done")

 print("loading documents start")
 docs = load_file(filepath, sentence_size=SENTENCE_SIZE)
 print("loading documents done")

 print("embedding start")
 docsearch = FAISSWrapper.from_documents(docs, embeddings)
 print("embedding done")

 print("loading qa start")
 prompt = PromptTemplate(
 template=PROMPT_TEMPLATE, input_variables=["context_str", "question"]
 )

 chain_type_kwargs = {"prompt": prompt, "document_variable_name": "context_str"}
 qa = RetrievalQA.from_chain_type(
 llm=llm,
 chain_type=CHAIN_TYPE, 
 retriever=docsearch.as_retriever(search_kwargs={"k": VECTOR_SEARCH_TOP_K}), 
 chain_type_kwargs=chain_type_kwargs)
 print("loading qa done")

 query = "大模型指令微调有好的策略?" 
 print(qa.run(query))