diff --git a/README.md b/README.md index 99d24c0..d4f82c4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@
+

@@ -73,6 +74,7 @@ If your device supports fp16 or bf16, we recommend installing [flash-attention]( ```bash git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention cd flash-attention && pip install . +# Below are optional. Installing them might be slow. pip install csrc/layer_norm pip install csrc/rotary ``` @@ -87,8 +89,7 @@ To use Qwen-7B-Chat for the inference, all you need to do is to input a few line from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.generation import GenerationConfig -# Note: For tokenizer usage, please refer to examples/tokenizer_showcase.ipynb. -# The default behavior now has injection attack prevention off. +# Note: The default behavior now has injection attack prevention off. tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # use bf16 @@ -147,7 +148,7 @@ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B", device_map="auto", model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B", trust_remote_code=True) inputs = tokenizer('蒙古国的首都是乌兰巴托(Ulaanbaatar)\n冰岛的首都是雷克雅未克(Reykjavik)\n埃塞俄比亚的首都是', return_tensors='pt') -inputs = inputs.to('cuda:0') +inputs = inputs.to(model.device) pred = model.generate(**inputs) print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)) # 蒙古国的首都是乌兰巴托(Ulaanbaatar)\n冰岛的首都是雷克雅未克(Reykjavik)\n埃塞俄比亚的首都是亚的斯亚贝巴(Addis Ababa)... @@ -184,6 +185,10 @@ response, history = results['response'], results['history'] print(f'Response: {response}') ``` +## Tokenizer + +Our tokenizer based on tiktoken is different from other tokenizers, e.g., sentencepiece tokenizer. You need to pay attention to special tokens, especially in finetuning. For more detailed information on the tokenizer and related use in fine-tuning, please refer to the [documentation](tokenization_note.md). + ## Quantization We provide examples to show how to load models in `NF4` and `Int8`. For starters, make sure you have implemented `bitsandbytes`. Note that the requirements for `bitsandbytes` are: