From 99b13b4fd234ee52c197ce00db705b39a7242ce7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B4=BA=E5=BC=98?= Date: Wed, 31 Jan 2024 10:14:37 +0800 Subject: [PATCH] add trt docker file && add warning for ascend --- recipes/finetune/ascend/README.md | 3 ++- recipes/inference/tensorrt/docker/Dockerfile | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 recipes/inference/tensorrt/docker/Dockerfile diff --git a/recipes/finetune/ascend/README.md b/recipes/finetune/ascend/README.md index a3e43b0..0dd3ae3 100644 --- a/recipes/finetune/ascend/README.md +++ b/recipes/finetune/ascend/README.md @@ -1,5 +1,5 @@ # Fine-tuning Qwen by Ascend NPU -Below, we provide a simple example to show how to finetune Qwen by Ascend NPU. You can also refer to the official [mindformers](https://gitee.com/mindspore/mindformers/blob/dev/research/qwen/qwen.md) for detailed usage. +Below, we provide a simple example to show how to finetune Qwen by Ascend NPU. Currently, fine-tuning and inference are supported for Qwen 7B and 14B models. You can also refer to the official [mindformers](https://gitee.com/mindspore/mindformers/blob/dev/research/qwen/qwen.md) for detailed usage. ## Environment Requirement @@ -137,6 +137,7 @@ python research/qwen/run_qwen.py \ --predict_data '比较适合深度学习入门的书籍有' \ --run_mode predict \ --load_checkpoint output/merged_model/rank_0/checkpoint_0.ckpt \ + --vocab_file Qwen/Qwen-7B-Chat/qwen.tiktoken \ --auto_trans_ckpt False \ --device_id 0 ``` \ No newline at end of file diff --git a/recipes/inference/tensorrt/docker/Dockerfile b/recipes/inference/tensorrt/docker/Dockerfile new file mode 100644 index 0000000..47da0cc --- /dev/null +++ b/recipes/inference/tensorrt/docker/Dockerfile @@ -0,0 +1,14 @@ +FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 + +RUN apt-get update && \ + apt-get -y install python3.10 python3-pip openmpi-bin libopenmpi-dev git && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install tensorrt_llm==0.8.0.dev2024011601 -U --no-cache-dir --pre --extra-index-url https://pypi.nvidia.com + +RUN pip install --no-cache-dir modelscope==1.11.1 + +RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \ + cd TensorRT-LLM && \ + git checkout c89653021e66ca78c55f02b366f404455bc12e8d && \ + pip install --no-cache-dir -r examples/qwen/requirements.txt \ No newline at end of file