From b81d4a8c7a57f9370bd7b6b4d1e0325092c100a2 Mon Sep 17 00:00:00 2001 From: Yang An Date: Sat, 5 Aug 2023 23:58:38 +0800 Subject: [PATCH] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2d42a77..e68a037 100644 --- a/README.md +++ b/README.md @@ -186,7 +186,7 @@ print(f'Response: {response}') ## Quantization -We provide examples to show how to load models in `NF4` and `Int8`. For starters, make sure you have implemented `bitsandbytes`. Note that the requirements for `bitsandbytes` is: +We provide examples to show how to load models in `NF4` and `Int8`. For starters, make sure you have implemented `bitsandbytes`. Note that the requirements for `bitsandbytes` are: ``` **Requirements** Python >=3.8. Linux distribution (Ubuntu, MacOS, etc.) + CUDA > 10.0. @@ -197,7 +197,7 @@ Windows users should find another option, which might be [bitsandbytes-windows-w Then you only need to add your quantization configuration to `AutoModelForCausalLM.from_pretrained`. See the example below: ```python -from transformers import BitsAndBytesConfig +from transformers import AutoModelForCausalLM, BitsAndBytesConfig # quantization configuration for NF4 (4 bits) quantization_config = BitsAndBytesConfig(