Isekai-Qwen/docker/Dockerfile

ARG CUDA_VERSION=11.7.1
ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04

FROM ${from} as base

ARG from

RUN <<EOF
apt update -y && apt upgrade -y && apt install -y --no-install-recommends  \
    git \
    git-lfs \
    python3 \
    python3-pip \
    python3-dev \
    wget \
    vim \
&& rm -rf /var/lib/apt/lists/*
EOF

RUN ln -s /usr/bin/python3 /usr/bin/python

RUN git lfs install

FROM base as dev

WORKDIR /

RUN mkdir -p /data/shared/Qwen

WORKDIR /data/shared/Qwen/

# Users can also mount '/data/shared/Qwen/' to keep the data
COPY ../requirements.txt ./
COPY ../requirements_web_demo.txt ./

FROM dev as bundle_req

ARG BUNDLE_REQUIREMENTS=true

RUN <<EOF
if [ "$BUNDLE_REQUIREMENTS" = "true" ]; then 
    cd /data/shared/Qwen
    pip3 install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2
    pip3 install -r requirements.txt
    pip3 install -r requirements_web_demo.txt
fi
EOF

FROM bundle_req as bundle_flash_attention
ARG BUNDLE_FLASH_ATTENTION=true

RUN <<EOF 
if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then
    cd /data/shared/Qwen 
    test -d flash-attention || git clone -b v2.3.3 https://github.com/Dao-AILab/flash-attention
    cd /data/shared/Qwen/flash-attention &&
        pip3 install . &&
        pip3 install csrc/layer_norm
fi
EOF

FROM bundle_flash_attention as bundle_finetune
ARG BUNDLE_FINETUNE=true

RUN <<EOF
if [ "$BUNDLE_FINETUNE" = "true" ]; then
    cd /data/shared/Qwen

    # Full-finetune / LoRA.
    pip3 install deepspeed peft

    # Q-LoRA.
    apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
        libopenmpi-dev openmpi-bin \
        && rm -rf /var/lib/apt/lists/*
    pip3 install "optimum==1.12.0" "auto-gptq==0.4.2" mpi4py
fi
EOF

FROM bundle_finetune as bundle_openai_api
ARG BUNDLE_OPENAI_API=true

RUN <<EOF
if [ "$BUNDLE_OPENAI_API" = "true" ]; then
    cd /data/shared/Qwen

    pip3 install fastapi uvicorn "openai<1.0.0" sse_starlette "pydantic<=1.10.13"
fi
EOF

FROM bundle_openai_api as final
ARG from

COPY ../requirements.txt ./
COPY ../requirements_web_demo.txt ./
COPY ../cli_demo.py ./
COPY ../web_demo.py ./
COPY ../openai_api.py ./
COPY ../finetune.py ./
COPY ../utils.py ./
COPY ./examples/* ./examples/
COPY ./eval/* ./eval/
COPY ./finetune/* ./finetune/

EXPOSE 80

WORKDIR /data/shared/Qwen/

CMD ["python3", "web_demo.py", "--server-port", "80", "--server-name", "0.0.0.0", "-c", "/data/shared/Qwen/Qwen-Chat/"]
add 72B and 1.8B Qwen models, add Ascend 910 and Hygon DCU support, add docker support 1 year ago			`ARG CUDA_VERSION=11.7.1`
			`ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04`

			`FROM ${from} as base`

			`ARG from`

			`RUN <<EOF`
			`apt update -y && apt upgrade -y && apt install -y --no-install-recommends \`
			`git \`
			`git-lfs \`
			`python3 \`
			`python3-pip \`
			`python3-dev \`
			`wget \`
			`vim \`
			`&& rm -rf /var/lib/apt/lists/*`
			`EOF`

			`RUN ln -s /usr/bin/python3 /usr/bin/python`

			`RUN git lfs install`

			`FROM base as dev`

			`WORKDIR /`

			`RUN mkdir -p /data/shared/Qwen`

			`WORKDIR /data/shared/Qwen/`

			`# Users can also mount '/data/shared/Qwen/' to keep the data`
			`COPY ../requirements.txt ./`
			`COPY ../requirements_web_demo.txt ./`

			`FROM dev as bundle_req`

			`ARG BUNDLE_REQUIREMENTS=true`

			`RUN <<EOF`
			`if [ "$BUNDLE_REQUIREMENTS" = "true" ]; then`
			`cd /data/shared/Qwen`
			`pip3 install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2`
			`pip3 install -r requirements.txt`
			`pip3 install -r requirements_web_demo.txt`
			`fi`
			`EOF`

			`FROM bundle_req as bundle_flash_attention`
			`ARG BUNDLE_FLASH_ATTENTION=true`

			`RUN <<EOF`
			`if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then`
			`cd /data/shared/Qwen`
			`test -d flash-attention \|\| git clone -b v2.3.3 https://github.com/Dao-AILab/flash-attention`
			`cd /data/shared/Qwen/flash-attention &&`
			`pip3 install . &&`
			`pip3 install csrc/layer_norm`
			`fi`
			`EOF`

			`FROM bundle_flash_attention as bundle_finetune`
			`ARG BUNDLE_FINETUNE=true`

			`RUN <<EOF`
			`if [ "$BUNDLE_FINETUNE" = "true" ]; then`
			`cd /data/shared/Qwen`

			`# Full-finetune / LoRA.`
			`pip3 install deepspeed peft`

			`# Q-LoRA.`
			`apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \`
			`libopenmpi-dev openmpi-bin \`
			`&& rm -rf /var/lib/apt/lists/*`
Fix Dockerfile for <https://github.com/QwenLM/Qwen/issues/783>. 1 year ago			`pip3 install "optimum==1.12.0" "auto-gptq==0.4.2" mpi4py`
add 72B and 1.8B Qwen models, add Ascend 910 and Hygon DCU support, add docker support 1 year ago			`fi`
			`EOF`

			`FROM bundle_finetune as bundle_openai_api`
			`ARG BUNDLE_OPENAI_API=true`

			`RUN <<EOF`
			`if [ "$BUNDLE_OPENAI_API" = "true" ]; then`
			`cd /data/shared/Qwen`

			`pip3 install fastapi uvicorn "openai<1.0.0" sse_starlette "pydantic<=1.10.13"`
			`fi`
			`EOF`

			`FROM bundle_openai_api as final`
			`ARG from`

			`COPY ../requirements.txt ./`
			`COPY ../requirements_web_demo.txt ./`
			`COPY ../cli_demo.py ./`
			`COPY ../web_demo.py ./`
			`COPY ../openai_api.py ./`
			`COPY ../finetune.py ./`
			`COPY ../utils.py ./`
			`COPY ./examples/* ./examples/`
			`COPY ./eval/* ./eval/`
			`COPY ./finetune/* ./finetune/`

			`EXPOSE 80`

			`WORKDIR /data/shared/Qwen/`

			`CMD ["python3", "web_demo.py", "--server-port", "80", "--server-name", "0.0.0.0", "-c", "/data/shared/Qwen/Qwen-Chat/"]`