Skip to content

Commit 25af3a5

Browse files
committed
Rationalization of BBS steps after benchmarks
The useful has been added, the useless has been removed.
1 parent 570e289 commit 25af3a5

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

koboldcpp.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2650,10 +2650,10 @@ def hide_tooltip(event):
26502650

26512651
tabcontent = {}
26522652
# slider data
2653-
blasbatchsize_values = ["-1", "1", "2", "4", "8", "16", "32", "40", "48", "64", "80", "96", "128", "160", "192", "256", "320", "384", "512", "640", "768", "1024", "1280", "1536", "2048", "2560", "3072", "4096"]
2654-
blasbatchsize_text = ["Don't Batch BLAS","1","2","4","8","16","32","40","48","64","80","96","128","160","192","256","320","384","512","640","768","1024","1280","1536","2048","2560","3072","4096"]
2655-
blasubatchsize_values = ["0", "1", "2", "4", "8", "16", "32", "40", "48", "64", "80", "96", "128", "160", "192", "256", "320", "384", "512", "640", "768", "1024", "1280", "1536", "2048", "2560", "3072", "4096"]
2656-
blasubatchsize_text = ["0 - Physical Blas Batch same as Logical","1","2","4","8","16","32","40","48","64","80","96","128","160","192","256","320","384","512","640","768","1024","1280","1536","2048","2560","3072","4096"]
2653+
blasbatchsize_values = ["-1", "1", "2", "4", "8", "16", "20", "24", "28", "32", "40", "48", "56", "64", "80", "96", "112", "128", "160", "192", "224", "256", "384", "512", "768", "1024", "1536", "2048", "3072", "4096"]
2654+
blasbatchsize_text = ["Don't Batch BLAS","1","2","4","8","16","20","24","28","32","40","48","56","64","80","96","112","128","160","192","224","256","384","512","768","1024","1536","2048","3072","4096"]
2655+
blasubatchsize_values = ["0", "1", "2", "4", "8", "16", "20", "24", "28", "32", "40", "48", "56", "64", "80", "96", "112", "128", "160", "192", "224", "256", "384", "512", "768", "1024", "1536", "2048", "3072", "4096"]
2656+
blasubatchsize_text = ["0 - Physical Blas Batch same as Logical","1","2","4","8","16","20","24","28","32","40","48","56","64","80","96","112","128","160","192","224","256","384","512","768","1024","1536","2048","3072","4096"]
26572657
contextsize_text = ["128", "256", "384", "512", "640", "768", "896", "1024", "1152", "1280", "1408", "1536", "1664", "1792", "1920", "2048", "2176", "2304", "2432", "2560", "2688", "2816", "2944", "3072", "3200", "3328", "3456", "3584", "3712", "3840", "3968", "4096", "4224", "4352", "4480", "4608", "4736", "4864", "4992", "5120", "5248", "5376", "5504", "5632", "5760", "5888", "6016", "6144", "6272", "6400", "6528", "6656", "6784", "6912", "7040", "7168", "7296", "7424", "7552", "7680", "7808", "7936", "8064", "8192", "8320", "8448", "8576", "8704", "8832", "8960", "9088", "9216", "9344", "9472", "9600", "9728", "9856", "9984", "10112", "10240", "10368", "10496", "10624", "10752", "10880", "11008", "11136", "11264", "11392", "11520", "11648", "11776", "11904", "12032", "12160", "12288", "12416", "12544", "12672", "12800", "12928", "13056", "13184", "13312", "13440", "13568", "13696", "13824", "13952", "14080", "14208", "14336", "14464", "14592", "14720", "14848", "14976", "15104", "15232", "15360", "15488", "15616", "15744", "15872", "16000", "16128", "16256", "16384", "16512", "16640", "16768", "16896", "17024", "17152", "17280", "17408", "17536", "17664", "17792", "17920", "18048", "18176", "18304", "18432", "18560", "18688", "18816", "18944", "19072", "19200", "19328", "19456", "19584", "19712", "19840", "19968", "20096", "20224", "20352", "20480", "20608", "20736", "20864", "20992", "21120", "21248", "21376", "21504", "21632", "21760", "21888", "22016", "22144", "22272", "22400", "22528", "22656", "22784", "22912", "23040", "23168", "23296", "23424", "23552", "23680", "23808", "23936", "24064", "24192", "24320", "24448", "24576", "24704", "24832", "24960", "25088", "25216", "25344", "25472", "25600", "25728", "25856", "25984", "26112", "26240", "26368", "26496", "26624", "26752", "26880", "27008", "27136", "27264", "27392", "27520", "27648", "27776", "27904", "28032", "28160", "28288", "28416", "28544", "28672", "28800", "28928", "29056", "29184", "29312", "29440", "29568", "29696", "29824", "29952", "30080", "30208", "30336", "30464", "30592", "30720", "30848", "30976", "31104", "31232", "31360", "31488", "31616", "31744", "31872", "32000", "32128", "32256", "32384", "32512", "32640", "32768", "32896", "33024", "33152", "33280", "33408", "33536", "33664", "33792", "33920", "34048", "34176", "34304", "34432", "34560", "34688", "34816", "34944", "35072", "35200", "35328", "35456", "35584", "35712", "35840", "35968", "36096", "36224", "36352", "36480", "36608", "36736", "36864", "36992", "37120", "37248", "37376", "37504", "37632", "37760", "37888", "38016", "38144", "38272", "38400", "38528", "38656", "38784", "38912", "39040", "39168", "39296", "39424", "39552", "39680", "39808", "39936", "40064", "40192", "40320", "40448", "40576", "40704", "40832", "40960", "41088", "41216", "41344", "41472", "41600", "41728", "41856", "41984", "42112", "42240", "42368", "42496", "42624", "42752", "42880", "43008", "43136", "43264", "43392", "43520", "43648", "43776", "43904", "44032", "44160", "44288", "44416", "44544", "44672", "44800", "44928", "45056", "45184", "45312", "45440", "45568", "45696", "45824", "45952", "46080", "46208", "46336", "46464", "46592", "46720", "46848", "46976", "47104", "47232", "47360", "47488", "47616", "47744", "47872", "48000", "48128", "48256", "48384", "48512", "48640", "48768", "48896", "49024", "49152", "49408", "49664", "49920", "50176", "50432", "50688", "50944", "51200", "51456", "51712", "51968", "52224", "52480", "52736", "52992", "53248", "53504", "53760", "54016", "54272", "54528", "54784", "55040", "55296", "55552", "55808", "56064", "56320", "56576", "56832", "57088", "57344", "57600", "57856", "58112", "58368", "58624", "58880", "59136", "59392", "59648", "59904", "60160", "60416", "60672", "60928", "61184", "61440", "61696", "61952", "62208", "62464", "62720", "62976", "63232", "63488", "63744", "64000", "64256", "64512", "64768", "65024", "65280", "65536", "66048", "66560", "67072", "67584", "68096", "68608", "69120", "69632", "70144", "70656", "71168", "71680", "72192", "72704", "73216", "73728", "74240", "74752", "75264", "75776", "76288", "76800", "77312", "77824", "78336", "78848", "79360", "79872", "80384", "80896", "81408", "81920", "82432", "82944", "83456", "83968", "84480", "84992", "85504", "86016", "86528", "87040", "87552", "88064", "88576", "89088", "89600", "90112", "90624", "91136", "91648", "92160", "92672", "93184", "93696", "94208", "94720", "95232", "95744", "96256", "96768", "97280", "97792", "98304", "99328", "100352", "101476", "102400", "103424", "104448", "105472", "106496", "107520", "108544", "109568", "110592", "111616", "112640", "113664", "114688", "115712", "116736", "117760", "118784", "119808", "120832", "121856", "122880", "123904", "124928", "125952", "126976", "128000", "129024", "130048", "131072", "132096", "133120", "134144", "135168", "136192", "137216", "138240", "139264", "140288", "141312", "142336", "143360", "144384", "145408", "146432", "147456", "148480", "149504", "150528", "151552", "152576", "153600", "154624", "155648", "156672", "157696", "158720", "159744", "160768", "161792", "162816", "163840", "164864", "165888", "166912", "167936", "168960", "169984", "171008", "172032", "173056", "174088", "175112", "176128", "177152", "178176", "179200", "180224", "181248", "182272", "183296", "184320", "185344", "186368", "187392", "188416", "189440", "190464", "191488", "192512", "193536", "194560", "195584", "196608", "198656", "200704", "202752", "204800", "206848", "208896", "210944", "212992", "215040", "217088", "219136", "221184", "223232", "225280", "227328", "229376", "231424", "233472", "235520", "237568", "239616", "241664", "243712", "245760", "247808", "249856", "251904", "253952", "256000", "258048", "260096", "262144", "266140", "270336", "274432", "278528", "282524", "286720", "290816", "294912", "299008", "303104", "307200", "311296", "315392", "319488", "323584", "327680", "331776", "335872", "339968", "344064", "348160", "352256", "356352", "360448", "364544", "368640", "372736", "376832", "380928", "385024", "389120", "393216", "401408", "409600", "417792", "425984", "434176", "442368", "450560", "458752", "466944", "475136", "483328", "491520", "499712", "507904", "516096", "524288", "540672", "557056", "573440", "589824", "606208", "622592", "638976", "655360", "671744", "688128", "704512", "720896", "737280", "753664", "770048", "786432", "802816", "819200", "835584", "851968", "868352", "884736", "901120", "917504", "933888", "950272", "966656", "983040", "999424", "1015808", "1032192", "1048576"]
26582658
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)]
26592659
quantkv_values = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22"]
@@ -3111,8 +3111,8 @@ def changerunmode(a,b,c):
31113111
makelabelentry(quick_tab, "BLAS threads:" , blas_threads_var, 14, 50,tooltip="How many threads to use during BLAS processing.\nIf left blank, uses same value as regular thread count.")
31123112

31133113
# blas batch size
3114-
makeslider(quick_tab, "BLAS Logical Batch Size - optimum of 128 if not filled :", blasbatchsize_text, blasbatchsize_var, 0, 27, 16, width=391, set=12,tooltip="How many tokens to process at once per batch.\nLarger values use more memory unless Physical Batch supersedes it.")
3115-
makeslider(quick_tab, "BLAS Physical Batch Size - same as Logical if not filled :", blasubatchsize_text, blasubatchsize_var, 0, 27, 18, width=391, set=0,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
3114+
makeslider(quick_tab, "BLAS Logical Batch Size - optimum of 128 if not filled :", blasbatchsize_text, blasbatchsize_var, 0, 29, 16, width=391, set=17,tooltip="How many tokens to process at once per batch.\nLarger values use more memory unless Physical Batch supersedes it.")
3115+
makeslider(quick_tab, "BLAS Physical Batch Size - same as Logical if not filled :", blasubatchsize_text, blasubatchsize_var, 0, 29, 18, width=391, set=0,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
31163116

31173117
# load model
31183118
makefileentry(quick_tab, "Model:", "Select GGML or GGML Model File", model_var, 40, 576, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
@@ -3168,8 +3168,8 @@ def changerunmode(a,b,c):
31683168
# blas thread specifier
31693169
makelabelentry(hardware_tab, "BLAS threads:" , blas_threads_var, 14, 50,tooltip="How many threads to use during BLAS processing.\nIf left blank, uses same value as regular thread count.")
31703170
# blas batch size
3171-
makeslider(hardware_tab, "BLAS Logical Batch Size - optimum of 128 if not filled :", blasbatchsize_text, blasbatchsize_var, 0, 27, 16, width=391, set=12 ,tooltip="How many tokens to process at once per batch.\nLarger values use more memory unless Physical Batch supersedes it.")
3172-
makeslider(hardware_tab, "BLAS Physical Batch Size - same as Logical if not filled :", blasubatchsize_text, blasubatchsize_var, 0, 27, 18, width=391, set=0,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
3171+
makeslider(hardware_tab, "BLAS Logical Batch Size - optimum of 128 if not filled :", blasbatchsize_text, blasbatchsize_var, 0, 29, 16, width=391, set=17 ,tooltip="How many tokens to process at once per batch.\nLarger values use more memory unless Physical Batch supersedes it.")
3172+
makeslider(hardware_tab, "BLAS Physical Batch Size - same as Logical if not filled :", blasubatchsize_text, blasubatchsize_var, 0, 29, 18, width=391, set=0,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
31733173
blasbatchsize_var.trace("w", changed_gpulayers_estimate)
31743174

31753175
# force version
@@ -5013,8 +5013,8 @@ def range_checker(arg: str):
50135013
#more advanced params
50145014
advparser = parser.add_argument_group('Advanced Commands')
50155015
advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For NTK Rope, a rule of thumb is to double the base frequency to go 50% beyond the base context, and to triple the base frequency to double the context. Beyond, the NTK calculations are more complex and you might need to use frequency scale as well. To use only linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
5016-
advparser.add_argument("--blasbatchsize", help="Sets the Logical batch size used in BLAS processing (default 128 for VRAM savings, optimal speed is 512, 256 is a great compromise). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,1,2,4,8,16,32,40,48,64,80,96,128,160,192,256,320,384,512,640,768,1024,1280,1536,2048,2560,3072,4096], default=128)
5017-
advparser.add_argument("--blasubatchsize", help="Sets the Physical batch size used in BLAS processing (default 128 for VRAM savings, optimal speed is 512, 256 is a great compromise). Setting it to 0 alignes Physical BLAS batch on logical BLAS.", type=int,choices=[0,1,2,4,8,16,32,40,48,64,80,96,128,160,192,256,320,384,512,640,768,1024,1280,1536,2048,2560,3072,4096], default=0)
5016+
advparser.add_argument("--blasbatchsize", help="Sets the Logical batch size used in BLAS processing (default 128 for VRAM savings, optimal speed is 512, 256 is a great compromise). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload. Max 4096.", type=check_range(int,-1,4096), default=128)
5017+
advparser.add_argument("--blasubatchsize", help="Sets the Physical batch size used in BLAS processing (default 128 for VRAM savings, optimal speed is 512, 256 is a great compromise). Setting it to 0 alignes Physical BLAS batch on logical BLAS. Max 4096.", type=check_range(int,0,4096), default=0)
50185018
advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
50195019
advparser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+')
50205020
advparser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context without reprocessing everything once the max context is reached. If you disable it (or need to use Quantized KV cache (KVQ) with FlashAttention, aka. modes 1 to 20, which are incompatible with Context Shift), you can eventually use --smartcontext instead.", action='store_true')

0 commit comments

Comments
 (0)