Skip to content

Commit 9d6a42b

Browse files
lijingningZhangYulongg
authored andcommitted
适配vLLM无arrival_time;适配vLLM model必传;RequestFuncInput/RequestFuncOutput/SampleRequest新增用例编号no
1 parent 1b712bb commit 9d6a42b

File tree

3 files changed

+215
-17
lines changed

3 files changed

+215
-17
lines changed

benchmarks/backend_request_func.py

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
@dataclass
3737
class RequestFuncInput:
3838
"""Input for requesting LLMs via API"""
39+
no: int
3940
prompt: str
4041
history_QA: Optional[dict]
4142
hyper_parameters: dict
@@ -54,6 +55,7 @@ class RequestFuncInput:
5455
@dataclass
5556
class RequestFuncOutput:
5657
"""Output for requesting LLMs via API"""
58+
no: int = 0
5759
generated_text: str = ""
5860
reasoning_content: str = ""
5961
success: bool = False
@@ -84,7 +86,7 @@ async def async_request_eb_openai_chat_completions(
8486
if request_func_input.multi_modal_content:
8587
content.append(request_func_input.multi_modal_content)
8688
payload = {
87-
"model": "default",
89+
"model": request_func_input.model,
8890
"messages": request_func_input.history_QA,
8991
"stream": True,
9092
"stream_options": {
@@ -97,13 +99,17 @@ async def async_request_eb_openai_chat_completions(
9799

98100
if request_func_input.ignore_eos:
99101
payload["ignore_eos"] = request_func_input.ignore_eos
102+
103+
print("payload:{}".format(json.dumps(payload, ensure_ascii=False)))
104+
100105
headers = {
101106
"Content-Type": "application/json",
102107
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
103108
}
104109

105110
output = RequestFuncOutput()
106111
output.prompt_len = 0
112+
output.no = request_func_input.no
107113

108114
ttft = 0.0
109115
st = time.perf_counter()
@@ -132,7 +138,8 @@ async def async_request_eb_openai_chat_completions(
132138
ttft = timestamp - st
133139
output.ttft = ttft
134140
# cached_tokens
135-
output.prompt_len = data["usage"]["prompt_tokens_details"]["cached_tokens"]
141+
output.prompt_len = data["usage"].get("prompt_tokens_details", {}).get("cached_tokens", 0)
142+
136143

137144
# Decoding phase
138145
else:
@@ -141,12 +148,12 @@ async def async_request_eb_openai_chat_completions(
141148

142149
output.generated_text += content or ""
143150
output.reasoning_content += reason_content or ""
144-
output.arrival_time.append(choices[0].get("arrival_time"))
145-
elif usage := data.get("usage"):
151+
output.arrival_time.append(choices[0].get("arrival_time", timestamp))
152+
elif usage := data.get("usage", {}):
146153
output.output_tokens = usage.get(
147-
"completion_tokens")
154+
"completion_tokens", 0)
148155
output.prompt_tokens = usage.get(
149-
"prompt_tokens")
156+
"prompt_tokens", 0)
150157

151158
most_recent_timestamp = timestamp
152159

@@ -173,6 +180,7 @@ async def async_request_eb_openai_chat_completions(
173180
f.write(str(output) + "\n")
174181
if pbar:
175182
pbar.update(1)
183+
print("#####final_output:", output)
176184
return output
177185

178186

@@ -189,7 +197,7 @@ async def async_request_eb_openai_completions(
189197
async with aiohttp.ClientSession(trust_env=True,
190198
timeout=AIOHTTP_TIMEOUT) as session:
191199
payload = {
192-
"model": "default",
200+
"model": request_func_input.model,
193201
"prompt": request_func_input.prompt,
194202
"stream": True,
195203
"stream_options": {
@@ -202,14 +210,20 @@ async def async_request_eb_openai_completions(
202210

203211
if request_func_input.ignore_eos:
204212
payload["ignore_eos"] = request_func_input.ignore_eos
213+
214+
print("payload:", json.dumps(payload, ensure_ascii=False))
215+
205216
headers = {
206-
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
217+
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
218+
"Content-Type": "application/json"
207219
}
208220

209221
output = RequestFuncOutput()
210222
output.prompt_len = request_func_input.prompt_len
223+
output.no = request_func_input.no
211224

212225
generated_text = ""
226+
ttft = 0.0
213227
st = time.perf_counter()
214228
most_recent_timestamp = st
215229
try:
@@ -226,6 +240,7 @@ async def async_request_eb_openai_completions(
226240
"data: ")
227241
if chunk != "[DONE]":
228242
# print("####chunk:", chunk, chunk.usage)
243+
timestamp = time.perf_counter()
229244
data = json.loads(chunk)
230245

231246
# NOTE: Some completion API might have a last
@@ -235,21 +250,22 @@ async def async_request_eb_openai_completions(
235250
# Note that text could be empty here
236251
# e.g. for special tokens
237252
text = choices[0].get("text")
238-
timestamp = time.perf_counter()
253+
239254
# First token
240255
if not first_chunk_received:
241256
first_chunk_received = True
242-
ttft = time.perf_counter() - st
257+
ttft = timestamp - st
243258
output.ttft = ttft
244259

245260
# Decoding phase
246261
else:
247262
output.itl.append(timestamp -
248263
most_recent_timestamp)
264+
265+
generated_text += text or ""
249266

250267
most_recent_timestamp = timestamp
251-
output.arrival_time.append(choices[0].get("arrival_time"))
252-
generated_text += text or ""
268+
output.arrival_time.append(choices[0].get("arrival_time", timestamp))
253269
elif usage := data.get("usage"):
254270
output.prompt_tokens = usage.get(
255271
"prompt_tokens")
@@ -262,15 +278,24 @@ async def async_request_eb_openai_completions(
262278
output.error = (
263279
"Never received a valid chunk to calculate TTFT."
264280
"This response will be marked as failed!")
281+
265282
output.generated_text = generated_text
266283
output.latency = most_recent_timestamp - st
284+
285+
if output.generated_text == "":
286+
output.success = False
287+
output.error = "No generated text found!"
288+
else:
289+
output.success = True
267290
else:
268291
output.error = response.reason or ""
269292
output.success = False
270293
except Exception:
271294
output.success = False
272295
exc_info = sys.exc_info()
273296
output.error = "".join(traceback.format_exception(*exc_info))
297+
298+
print("final_output:{}".format(output))
274299

275300
if pbar:
276301
pbar.update(1)

benchmarks/benchmark_dataset.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class SampleRequest:
3838
"""
3939
Represents a single inference request for benchmarking.
4040
"""
41-
41+
no: int
4242
prompt: Union[str, Any]
4343
history_QA: Union[str, Any]
4444
json_data: Optional[dict]
@@ -229,6 +229,7 @@ def sample(
229229
**kwargs,
230230
) -> list:
231231
samples: list = []
232+
cnt = 1
232233
for entry in self.data:
233234
if len(samples) >= num_requests:
234235
break
@@ -246,16 +247,17 @@ def sample(
246247
prompt, None)
247248
samples.append(
248249
SampleRequest(
250+
no=cnt,
249251
prompt=prompt,
250252
prompt_len=self.prompt_len,
251253
history_QA=[],
252254
expected_output_len=new_output_len,
253255
))
256+
cnt += 1
254257

255258
self.maybe_oversample_requests(samples, num_requests)
256259
return samples
257260

258-
259261
class EBChatDataset(BenchmarkDataset):
260262
"""
261263
Implements the ShareGPT dataset. Loads data from a JSON file and generates
@@ -284,6 +286,7 @@ def sample(
284286
**kwargs,
285287
) -> list:
286288
samples: list = []
289+
cnt = 1
287290
for entry in self.data:
288291
if len(samples) >= num_requests:
289292
break
@@ -297,12 +300,14 @@ def sample(
297300
prompt, None)
298301
samples.append(
299302
SampleRequest(
303+
no=cnt,
300304
json_data=json_data,
301305
prompt=prompt,
302306
prompt_len=0,
303307
history_QA=history_QA,
304308
expected_output_len=new_output_len,
305309
))
310+
cnt += 1
306311

307312
self.maybe_oversample_requests(samples, num_requests)
308313
return samples

0 commit comments

Comments
 (0)