vllm安装方法
·
vllm安装方法
1、安装vllm
- 参照文档
https://docs.vllm.ai/en/latest/getting_started/quickstart.html
步骤
1)安装uv
curl -LsSf https://astral.sh/uv/install.sh | sh
2)创建环境
conda create -n myenv python=3.12 -y
conda activate myenv
pip install --upgrade uv
pip install torch==2.7.1 torchvision==0.22.1 torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cu118
uv pip install vllm --torch-backend=cu11.8
# 注意事项:重点要安装上torch==2.7.1,其中要求系统为20.04以上,18.04安装不了torch==2.7.1
2、验证vllm是否安装成功
如出现以下提示,则说明vllm调用成功
(vllm) root@dev-00805f96-686a-4adb-bdb0-c9abdf981f55-lfvwr:~/userdata/vllm-main# CUDA_VISIBLE_DEVICES=0 vllm serve /home/apulis-dev/userdata/vllm-main/checkpoint/checkpoint-630/checkpoint-630 --limit-mm-per-prompt '{"image":1}' --gpu-memory-utilization 0.4 --api-key apulis123 --served-model-name OCR_MODEL --tensor-parallel-size 1 --max-num-seqs 64 --max-model-len 512 --dtype auto --disable-mm-preprocessor-cache --seed 42 --host 127.0.0.1 --port 8004
INFO: Started server process [69908]
INFO: Waiting for application startup.
INFO: Application startup complete
3、调用vllm
CUDA_VISIBLE_DEVICES=0 \
vllm serve /home/apulis-dev/userdata/vllm-main/checkpoint/checkpoint-630/checkpoint-630 \
--limit-mm-per-prompt '{"image":1}' \
--gpu-memory-utilization 0.4 \
--api-key apulis123 \
--served-model-name OCR_MODEL \
--tensor-parallel-size 1 \
--max-num-seqs 64 \
--max-model-len 512 \
--dtype auto \
--disable-mm-preprocessor-cache \
--seed 42 \
--host 127.0.0.1 --port 8004
ssh -L 8004:localhost:8004 -p 45112 apulis-dev@gpu02.apulis.com.cn
4、代码版本及环境
代码版本:v0.10.2.rc1
环境:
name: vllm
channels:
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- bzip2=1.0.8=h5eee18b_6
- ca-certificates=2025.8.3=hbd8a1cb_0
- expat=2.7.1=h6a678d5_0
- ld_impl_linux-64=2.44=h1423503_1
- libffi=3.4.6=h2dba641_1
- libgcc=15.1.0=h767d61c_4
- libgcc-ng=15.1.0=h69a702a_4
- libgomp=15.1.0=h767d61c_4
- liblzma=5.8.1=hb9d3cd8_2
- liblzma-devel=5.8.1=hb9d3cd8_2
- libstdcxx=15.1.0=h8f9b012_4
- libstdcxx-ng=15.1.0=h4852527_4
- libuuid=1.41.5=h5eee18b_0
- libxcb=1.17.0=h9b100fa_0
- ncurses=6.5=h7934f7d_0
- openssl=3.5.2=h26f9b46_0
- pip=25.2=pyh8b19718_0
- pthread-stubs=0.4=hb9d3cd8_1002
- python=3.12.11=h22baa00_0
- readline=8.3=hc2a1206_0
- sqlite=3.50.2=hb25bd0a_1
- tk=8.6.15=h54e0aa7_0
- tzdata=2025b=h04d1e81_0
- wheel=0.45.1=py312h06a4308_0
- xorg-libx11=1.8.12=h9b100fa_1
- xorg-libxau=1.0.12=h9b100fa_0
- xorg-libxdmcp=1.1.5=h9b100fa_0
- xorg-xorgproto=2024.1=h5eee18b_1
- xz=5.8.1=hbcc6ac9_2
- xz-gpl-tools=5.8.1=hbcc6ac9_2
- xz-tools=5.8.1=hb9d3cd8_2
- zlib=1.2.13=h5eee18b_1
- pip:
- aiohappyeyeballs==2.6.1
- aiohttp==3.12.15
- aiosignal==1.4.0
- airportsdata==20250811
- annotated-types==0.7.0
- anyio==4.10.0
- astor==0.8.1
- attrs==25.3.0
- blake3==1.0.5
- cachetools==6.2.0
- cbor2==5.7.0
- certifi==2025.8.3
- cffi==1.17.1
- charset-normalizer==3.4.3
- click==8.2.1
- cloudpickle==3.1.1
- compressed-tensors==0.10.1
- cupy-cuda12x==13.6.0
- depyf==0.18.0
- dill==0.4.0
- diskcache==5.6.3
- distro==1.9.0
- dnspython==2.7.0
- docopt==0.6.2
- einops==0.8.1
- email-validator==2.3.0
- fastapi==0.116.1
- fastapi-cli==0.0.8
- fastapi-cloud-cli==0.1.5
- fastrlock==0.8.3
- filelock==3.19.1
- frozenlist==1.7.0
- fsspec==2024.6.1
- gguf==0.17.1
- googleapis-common-protos==1.70.0
- grpcio==1.74.0
- h11==0.16.0
- hf-xet==1.1.8
- httpcore==1.0.9
- httptools==0.6.4
- httpx==0.28.1
- huggingface-hub==0.34.4
- idna==3.10
- importlib-metadata==8.7.0
- interegular==0.3.3
- jinja2==3.1.6
- jiter==0.10.0
- jsonschema==4.25.1
- jsonschema-specifications==2025.4.1
- lark==1.2.2
- llguidance==0.7.30
- llvmlite==0.44.0
- lm-format-enforcer==0.10.12
- markdown-it-py==4.0.0
- markupsafe==2.1.5
- mdurl==0.1.2
- mistral-common==1.8.4
- mpmath==1.3.0
- msgpack==1.1.1
- msgspec==0.19.0
- multidict==6.6.4
- nest-asyncio==1.6.0
- networkx==3.3
- ninja==1.13.0
- num2words==0.5.14
- numba==0.61.2
- numpy==2.1.2
- nvidia-cublas-cu11==11.11.3.6
- nvidia-cublas-cu12==12.6.4.1
- nvidia-cuda-cupti-cu11==11.8.87
- nvidia-cuda-cupti-cu12==12.6.80
- nvidia-cuda-nvrtc-cu11==11.8.89
- nvidia-cuda-nvrtc-cu12==12.6.77
- nvidia-cuda-runtime-cu11==11.8.89
- nvidia-cuda-runtime-cu12==12.6.77
- nvidia-cudnn-cu11==9.1.0.70
- nvidia-cudnn-cu12==9.5.1.17
- nvidia-cufft-cu11==10.9.0.58
- nvidia-cufft-cu12==11.3.0.4
- nvidia-cufile-cu12==1.11.1.6
- nvidia-curand-cu11==10.3.0.86
- nvidia-curand-cu12==10.3.7.77
- nvidia-cusolver-cu11==11.4.1.48
- nvidia-cusolver-cu12==11.7.1.2
- nvidia-cusparse-cu11==11.7.5.86
- nvidia-cusparse-cu12==12.5.4.2
- nvidia-cusparselt-cu12==0.6.3
- nvidia-nccl-cu11==2.21.5
- nvidia-nccl-cu12==2.26.2
- nvidia-nvjitlink-cu12==12.6.85
- nvidia-nvtx-cu11==11.8.86
- nvidia-nvtx-cu12==12.6.77
- openai==1.102.0
- openai-harmony==0.0.4
- opencv-python-headless==4.12.0.88
- opentelemetry-api==1.36.0
- opentelemetry-exporter-otlp==1.36.0
- opentelemetry-exporter-otlp-proto-common==1.36.0
- opentelemetry-exporter-otlp-proto-grpc==1.36.0
- opentelemetry-exporter-otlp-proto-http==1.36.0
- opentelemetry-proto==1.36.0
- opentelemetry-sdk==1.36.0
- opentelemetry-semantic-conventions==0.57b0
- opentelemetry-semantic-conventions-ai==0.4.13
- outlines==0.1.11
- outlines-core==0.1.26
- packaging==25.0
- partial-json-parser==0.2.1.1.post6
- pillow==11.0.0
- prometheus-client==0.22.1
- prometheus-fastapi-instrumentator==7.1.0
- propcache==0.3.2
- protobuf==6.32.0
- psutil==7.0.0
- py-cpuinfo==9.0.0
- pybase64==1.4.2
- pycountry==24.6.1
- pycparser==2.22
- pydantic==2.11.7
- pydantic-core==2.33.2
- pydantic-extra-types==2.10.5
- pygments==2.19.2
- python-dotenv==1.1.1
- python-json-logger==3.3.0
- python-multipart==0.0.20
- pyyaml==6.0.2
- pyzmq==27.0.2
- ray==2.49.0
- referencing==0.36.2
- regex==2025.7.34
- requests==2.32.5
- rich==14.1.0
- rich-toolkit==0.15.0
- rignore==0.6.4
- rpds-py==0.27.0
- safetensors==0.6.2
- scipy==1.16.1
- sentencepiece==0.2.1
- sentry-sdk==2.35.1
- setproctitle==1.3.6
- setuptools==79.0.1
- shellingham==1.5.4
- six==1.17.0
- sniffio==1.3.1
- soundfile==0.13.1
- soxr==0.5.0.post1
- starlette==0.47.3
- sympy==1.13.3
- tiktoken==0.11.0
- tokenizers==0.21.4
- torch==2.7.0
- torchaudio==2.7.0
- torchvision==0.22.0
- tqdm==
更多推荐
所有评论(0)