fix: fix the openai frontend's arguments missing error (#8121)

richardhuo-nv · web-flow · commit ff4460dce1a9 · 2025-04-02T16:05:59.000-07:00
diff --git a/src/python/tritonfrontend/_api/_kservegrpc.py b/src/python/tritonfrontend/_api/_kservegrpc.py
@@ -1,4 +1,4 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -80,6 +80,7 @@ class Options:
             int, Grpc_compression_level
         ] = Grpc_compression_level.NONE
         infer_allocation_pool_size: int = Field(8, ge=0)
+        max_response_pool_size: int = Field(2_147_483_647, ge=0)
         forward_header_pattern: str = ""
         # DLIS-7215: Add restricted protocol support
         # restricted_protocols: str = ""
diff --git a/src/python/tritonfrontend/_api/_kservegrpc.pyi b/src/python/tritonfrontend/_api/_kservegrpc.pyi
@@ -1,4 +1,4 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -57,6 +57,7 @@ class KServeGrpc:
         max_connection_age_grace_ms: int
         infer_compression_level: int | Grpc_compression_level
         infer_allocation_pool_size: int
+        max_response_pool_size: int
         forward_header_pattern: str
         def __post_init__(self) -> None: ...
     triton_frontend: Incomplete