@@ -769,6 +769,8 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
769
769
Science Model Deployment endpoint. See:
770
770
https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-policies-auth.htm#model_dep_policies_auth__predict-endpoint
771
771
772
+ See https://docs.vllm.ai/en/latest/api/inference_params.html for the defaults of the parameters.
773
+
772
774
Example:
773
775
774
776
.. code-block:: python
@@ -786,7 +788,7 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
786
788
787
789
""" # noqa: E501
788
790
789
- frequency_penalty : float = 0.0
791
+ frequency_penalty : Optional [ float ] = None
790
792
"""Penalizes repeated tokens according to frequency. Between 0 and 1."""
791
793
792
794
logit_bias : Optional [Dict [str , float ]] = None
@@ -798,7 +800,7 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
798
800
n : int = 1
799
801
"""Number of output sequences to return for the given prompt."""
800
802
801
- presence_penalty : float = 0.0
803
+ presence_penalty : Optional [ float ] = None
802
804
"""Penalizes repeated tokens. Between 0 and 1."""
803
805
804
806
temperature : float = 0.2
@@ -812,7 +814,7 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
812
814
(the one with the highest log probability per token).
813
815
"""
814
816
815
- use_beam_search : Optional [bool ] = False
817
+ use_beam_search : Optional [bool ] = None
816
818
"""Whether to use beam search instead of sampling."""
817
819
818
820
top_k : Optional [int ] = - 1
@@ -822,15 +824,15 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
822
824
"""Float that represents the minimum probability for a token to be considered.
823
825
Must be in [0,1]. 0 to disable this."""
824
826
825
- repetition_penalty : Optional [float ] = 1.0
827
+ repetition_penalty : Optional [float ] = None
826
828
"""Float that penalizes new tokens based on their frequency in the
827
829
generated text. Values > 1 encourage the model to use new tokens."""
828
830
829
- length_penalty : Optional [float ] = 1.0
831
+ length_penalty : Optional [float ] = None
830
832
"""Float that penalizes sequences based on their length. Used only
831
833
when `use_beam_search` is True."""
832
834
833
- early_stopping : Optional [bool ] = False
835
+ early_stopping : Optional [bool ] = None
834
836
"""Controls the stopping condition for beam search. It accepts the
835
837
following values: `True`, where the generation stops as soon as there
836
838
are `best_of` complete candidates; `False`, where a heuristic is applied
@@ -842,7 +844,7 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
842
844
"""Whether to ignore the EOS token and continue generating tokens after
843
845
the EOS token is generated."""
844
846
845
- min_tokens : Optional [int ] = 0
847
+ min_tokens : Optional [int ] = None
846
848
"""Minimum number of tokens to generate per output sequence before
847
849
EOS or stop_token_ids can be generated"""
848
850
@@ -851,12 +853,11 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
851
853
The returned output will contain the stop tokens unless the stop tokens
852
854
are special tokens."""
853
855
854
- skip_special_tokens : Optional [bool ] = True
856
+ skip_special_tokens : Optional [bool ] = None
855
857
"""Whether to skip special tokens in the output. Defaults to True."""
856
858
857
- spaces_between_special_tokens : Optional [bool ] = True
858
- """Whether to add spaces between special tokens in the output.
859
- Defaults to True."""
859
+ spaces_between_special_tokens : Optional [bool ] = None
860
+ """Whether to add spaces between special tokens in the output."""
860
861
861
862
tool_choice : Optional [str ] = None
862
863
"""Whether to use tool calling.
0 commit comments