Skip to content

Commit f2204bf

Browse files
committed
Fix bench
1 parent 2af8d26 commit f2204bf

File tree

1 file changed

+28
-4
lines changed

1 file changed

+28
-4
lines changed

koboldcpp.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4618,7 +4618,12 @@ def onready_subprocess():
46184618
gpu1avram = int(MaxMemory[1]/1024/1024)
46194619
gpu2avram = int(MaxMemory[2]/1024/1024)
46204620
gpu3avram = int(MaxMemory[3]/1024/1024)
4621+
gpu0fvram = int(MaxFreeMemory[0]/1024/1024)
4622+
gpu1fvram = int(MaxFreeMemory[1]/1024/1024)
4623+
gpu2fvram = int(MaxFreeMemory[2]/1024/1024)
4624+
gpu3fvram = int(MaxFreeMemory[3]/1024/1024)
46214625
gpuavram = gpu0avram + gpu1avram + gpu2avram + gpu3avram
4626+
gpufvram = gpu0fvram + gpu1fvram + gpu2fvram + gpu3fvram
46224627
benchmaxctx = (maxctx - 128)
46234628
benchtg = args.promptlimit
46244629
benchpp = (benchmaxctx - benchtg)
@@ -4663,7 +4668,7 @@ def onready_subprocess():
46634668
print(result)
46644669
if args.benchmark:
46654670
result = (result[:4] if len(result)>4 else "") if not args.prompt else result
4666-
resultok = (result=="1111")
4671+
resultok = ((result==" 1 1") or (result=="1 1 "))
46674672
t_pp = float(handle.get_last_process_time())*float(benchpp)*0.001
46684673
t_gen = float(handle.get_last_eval_time())*float(benchtg)*0.001
46694674
s_pp = float(benchpp)/t_pp
@@ -4679,16 +4684,35 @@ def onready_subprocess():
46794684
print(f"HighPriority: {args.highpriority}")
46804685
print(f"FlashAttention: {args.flashattention}")
46814686
print(f"Threads: {args.threads}")
4687+
CUDevicesNames.sort(reverse=True)
4688+
if gpu0avram>0:
4689+
print(f"GPU 0 Name: {CUDevicesNames[0]}")
46824690
if gpu0avram>0:
46834691
print(f"GPU 0 VRAM: {gpu0avram} MiB")
4692+
if gpu0fvram>0:
4693+
print(f"GPU 0 VRAM: {gpu0fvram} MiB")
4694+
if gpu1avram>0:
4695+
print(f"GPU 1 Name: {CUDevicesNames[1]}")
46844696
if gpu1avram>0:
46854697
print(f"GPU 1 VRAM: {gpu1avram} MiB")
4698+
if gpu1fvram>0:
4699+
print(f"GPU 1 VRAM: {gpu1fvram} MiB")
4700+
if gpu2avram>0:
4701+
print(f"GPU 2 Name: {CUDevicesNames[2]}")
46864702
if gpu2avram>0:
46874703
print(f"GPU 2 VRAM: {gpu2avram} MiB")
4704+
if gpu2fvram>0:
4705+
print(f"GPU 2 VRAM: {gpu2fvram} MiB")
4706+
if gpu3avram>0:
4707+
print(f"GPU 3 Name: {CUDevicesNames[3]}")
46884708
if gpu3avram>0:
46894709
print(f"GPU 3 VRAM: {gpu3avram} MiB")
4710+
if gpu3fvram>0:
4711+
print(f"GPU 3 VRAM: {gpu3fvram} MiB")
46904712
if gpuavram > gpu0avram:
46914713
print(f"GPUs Total VRAM: {gpuavram} MiB")
4714+
if gpufvram > gpu0fvram:
4715+
print(f"GPUs Total VRAM: {gpufvram} MiB")
46924716
print(f"Cublas_Args: {args.usecublas}")
46934717
print(f"Layers: {args.gpulayers}")
46944718
print(f"Tensor_Split: {args.tensor_split}")
@@ -4712,8 +4736,8 @@ def onready_subprocess():
47124736
with open(args.benchmark, "a") as file:
47134737
file.seek(0, 2)
47144738
if file.tell() == 0: #empty file
4715-
file.write(f"Datime,KCPPF,LCPP,Backend,CudaSpecifics,Model,NoAvx2,NoBlas,NoMmap,HighP,FlashA,Thrd,VRAM,Layers,BlasThrd,BBSizeN,BBSizeU,KVC,PPNum,PPTime,PPSpeed,TGNum,TGTime,TGSpeed,BenchCtx,TotalTime,Coher,Tensor1,Split2,Cublas1,Argument2,Argument3,Argument4")
4716-
file.write(f"\n{ReleaseDate},{KcppVersion},{LcppVersion},{libname},{CudaSpecifics},{benchmodel},{args.noavx2},{args.noblas},{args.nommap},{args.highpriority},{args.flashattention},{args.threads},{gpuavram},{args.gpulayers},{args.blasthreads},{args.blasbatchsize},{args.blasubatchsize},{args.quantkv},{benchpp},{t_pp:.3f},{s_pp:.2f},{benchtg},{t_gen:.3f},{s_gen:.2f},{benchmaxctx},{(t_pp+t_gen):.3f},{resultok},{args.tensor_split},,{args.usecublas},,,")
4739+
file.write(f"Datime,KCPPF,LCPP,Backend,CudaSpecifics,Model,NoAvx2,NoBlas,NoMmap,HighP,FlashA,Thrd,VRAM,FVRAM0,Layers,BlasThrd,BBSizeN,BBSizeU,KVC,PPNum,PPTime,PPSpeed,TGNum,TGTime,TGSpeed,BenchCtx,TotalTime,Coher,Tensor1,Split2,Cublas1,Argument2,Argument3,Argument4")
4740+
file.write(f"\n{ReleaseDate},{KcppVersion},{LcppVersion},{libname},{CudaSpecifics},{benchmodel},{args.noavx2},{args.noblas},{args.nommap},{args.highpriority},{args.flashattention},{args.threads},{gpuavram},{gpu0fvram},{args.gpulayers},{args.blasthreads},{args.blasbatchsize},{args.blasubatchsize},{args.quantkv},{benchpp},{t_pp:.3f},{s_pp:.2f},{benchtg},{t_gen:.3f},{s_gen:.2f},{benchmaxctx},{(t_pp+t_gen):.3f},{resultok},{args.tensor_split},,{args.usecublas},,,")
47174741
except Exception as e:
47184742
print(f"Error writing benchmark to file: {e}")
47194743
global using_gui_launcher
@@ -4817,7 +4841,7 @@ def range_checker(arg: str):
48174841
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
48184842
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
48194843
advparser.add_argument("--prompt", metavar=('[prompt]'), help="Passing a prompt string triggers a direct inference, loading the model, outputs the response to stdout and exits. Can be used alone or with benchmark.", type=str, default="")
4820-
advparser.add_argument("--promptlimit", help="Sets the maximum number of generated tokens, usable only with --prompt or --benchmark",metavar=('[token limit]'), type=int, default=100)
4844+
advparser.add_argument("--promptlimit", help="Sets the maximum number of generated tokens, usable only with --prompt or --benchmark",metavar=('[token limit]'), type=int, default=128)
48214845
advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
48224846
advparser.add_argument("--remotetunnel", help="Uses Cloudflare to create a remote tunnel, allowing you to access koboldcpp remotely over the internet even behind a firewall.", action='store_true')
48234847
advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')

0 commit comments

Comments
 (0)