We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3a71c54 commit d522f1eCopy full SHA for d522f1e
vllm/config.py
@@ -989,14 +989,6 @@ def _verify_cuda_graph(self) -> None:
989
"to eager mode.", self.hf_config.model_type)
990
self.enforce_eager = True
991
992
- RECOMMENDED_MODEL_SUPPORTS_CUDA_GRAPH = ['phi3samba']
993
- if (self.hf_config.model_type in RECOMMENDED_MODEL_SUPPORTS_CUDA_GRAPH
994
- and not self.enforce_eager and self.max_seq_len_to_capture < self.max_model_len):
995
- logger.warning(
996
- "%s model performs best with the CUDA graph explicitly enabled. Set `--max-seq-len-to-capture <#>` "
997
- "when starting vLLM.", self.hf_config.model_type)
998
-
999
1000
def _verify_bnb_config(self) -> None:
1001
"""
1002
The current version of bitsandbytes (0.46.1) with 8-bit models does not
0 commit comments