# hugginface transformers, pytorch required
cd ./models && python3 down_models.py --model-name deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
사용 환경에 맞게 변경
# .env
export MODEL_NAME=Qwen2.5-0.5B-Instruct
export SERVED_MODEL_NAME=ChatBot
export GPU_MEMORY_UTILIZATION=0.8
export WEBUI_PORT=3000
bash launch.sh
아래처럼 VLLM_USE_V1, VLLM_ATTENTION_BACKEND bfloat16 옵션 추가하여 사용
services:
vllm:
...
entrypoint: python3 -m vllm.entrypoints.openai.api_server
...
--dtype bfloat16 # 👈
...
environment:
- VLLM_USE_V1 1 # 👈
- VLLM_ATTENTION_BACKEND: "FLASH_ATTN" # 👈