Skip to content

Commit e100153

Browse files
committed
Merge remote-tracking branch 'origin/compilade/refactor-kv-cache' into GraniteFourWithJamba
* origin/compilade/refactor-kv-cache: (32 commits) convert : fix jamba conv1d shape squeezing llama : partially apply clang-format style llama : remove implicit recurrent state rollbacks llama : begin renaming llama_past back to llama_kv_cache llama : use unused n_embd_k_gqa in k_shift llama : fix mixed signedness comparison convert_hf : fix Jamba conversion llama : session saving and reloading for hybrid models mamba : fix non-contiguous usage of ggml_silu examples : replace llama_kv_cache_seq_* with llama_past_seq_* llama : rename llama_cache to llama_past llama : allow doing the equivalent of SSM_CONV with SUM_ROWS and MUL llama : fix .base() compilation error on Windows llama : use im2col and mul_mat to perform convolution for Mamba llama : avoid copies for simple batch splits llama : fix edge case finding batch seq_id of split recurrent cell llama : minimize swaps when reordering logits llama : fix batch split output count for embeddings llama : use equal-sequence-length sub-batches for recurrent models llama : sequence-length-aware batch splitting ...
2 parents d7f4d73 + 908e655 commit e100153

File tree

9 files changed

+581
-278
lines changed

9 files changed

+581
-278
lines changed

convert_hf_to_gguf.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5057,6 +5057,123 @@ def modify_tensors(
50575057
yield self.map_tensor_name(name), data_torch
50585058

50595059

5060+
@ModelBase.register("JambaForCausalLM")
5061+
class JambaModel(TextModel):
5062+
model_arch = gguf.MODEL_ARCH.JAMBA
5063+
5064+
def get_vocab_base_pre(self, tokenizer) -> str:
5065+
del tokenizer # unused
5066+
5067+
return "gpt-2"
5068+
5069+
def set_vocab(self):
5070+
if (self.dir_model / "tokenizer.model").is_file():
5071+
# Using Jamba's tokenizer.json causes errors on model load
5072+
# (something about "byte not found in vocab"),
5073+
# but there's a working tokenizer.model
5074+
self._set_vocab_sentencepiece()
5075+
else:
5076+
# Some Jamba models only have a tokenizer.json, which works.
5077+
self._set_vocab_gpt2()
5078+
5079+
def set_gguf_parameters(self):
5080+
d_model = self.find_hparam(["hidden_size", "mamba_d_model"])
5081+
d_conv = self.find_hparam(["mamba_d_conv"], optional=True) or 4
5082+
d_inner = self.hparams["mamba_expand"] * d_model
5083+
d_state = self.find_hparam(["mamba_d_state"], optional=True) or 16
5084+
# ceiling division
5085+
# ref: https://stackoverflow.com/a/17511341/22827863
5086+
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
5087+
dt_rank = self.find_hparam(["mamba_dt_rank"], optional=True) or -(d_model // -16)
5088+
rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-6
5089+
n_kv_head = self.hparams["num_key_value_heads"]
5090+
attn_offset = self.hparams["attn_layer_offset"]
5091+
attn_period = self.hparams["attn_layer_period"]
5092+
n_kv_vec = [0 for _ in range(attn_offset)] + [
5093+
n_kv_head if (i - attn_offset) % attn_period == 0 else 0 for i in range(attn_offset, self.block_count)
5094+
]
5095+
5096+
self.gguf_writer.add_block_count(self.block_count)
5097+
self.gguf_writer.add_context_length(self.find_hparam(["max_position_embeddings", "n_ctx"]))
5098+
self.gguf_writer.add_embedding_length(d_model)
5099+
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
5100+
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
5101+
self.gguf_writer.add_head_count_kv(n_kv_vec)
5102+
self.gguf_writer.add_ssm_conv_kernel(d_conv)
5103+
self.gguf_writer.add_ssm_inner_size(d_inner)
5104+
self.gguf_writer.add_ssm_state_size(d_state)
5105+
self.gguf_writer.add_ssm_time_step_rank(dt_rank)
5106+
self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
5107+
self.gguf_writer.add_expert_count(self.hparams["num_experts"])
5108+
self.gguf_writer.add_expert_used_count(self.hparams["num_experts_per_tok"])
5109+
self.gguf_writer.add_file_type(self.ftype)
5110+
5111+
_experts: list[dict[str, Tensor]] | None = None
5112+
5113+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5114+
5115+
# Mini-Jamba
5116+
name = name.replace(".moe.", ".feed_forward.")
5117+
if bid is not None:
5118+
moe_offset = self.hparams["expert_layer_offset"]
5119+
moe_period = self.hparams["expert_layer_period"]
5120+
5121+
if not (bid >= moe_offset and (bid - moe_offset) % moe_period == 0):
5122+
name = name.replace(".experts.0.", ".")
5123+
5124+
# process the experts separately
5125+
if ".feed_forward.experts." in name:
5126+
n_experts = self.hparams["num_experts"]
5127+
5128+
assert bid is not None
5129+
5130+
if self._experts is None:
5131+
self._experts = [{} for _ in range(self.block_count)]
5132+
5133+
self._experts[bid][name] = data_torch
5134+
5135+
if len(self._experts[bid]) >= n_experts * 3:
5136+
5137+
# merge the experts into a single 3d tensor
5138+
for wid in ["down_proj", "gate_proj", "up_proj"]:
5139+
datas: list[Tensor] = []
5140+
5141+
for xid in range(n_experts):
5142+
ename = f"model.layers.{bid}.feed_forward.experts.{xid}.{wid}.weight"
5143+
datas.append(self._experts[bid][ename])
5144+
del self._experts[bid][ename]
5145+
5146+
data_torch = torch.stack(datas, dim=0)
5147+
5148+
# using the same merged name as qwen2moe
5149+
merged_name = f"model.layers.{bid}.mlp.experts.{wid}.weight"
5150+
5151+
new_name = self.map_tensor_name(merged_name)
5152+
5153+
yield new_name, data_torch
5154+
return
5155+
5156+
new_name = self.map_tensor_name(name)
5157+
5158+
if self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_CONV1D, bid):
5159+
data_torch = data_torch.squeeze()
5160+
5161+
if name.endswith(".A_log"):
5162+
logger.debug("A_log --> A ==> " + new_name)
5163+
data_torch = -torch.exp(data_torch)
5164+
5165+
yield (new_name, data_torch)
5166+
5167+
def prepare_tensors(self):
5168+
super().prepare_tensors()
5169+
5170+
if self._experts is not None:
5171+
# flatten `list[dict[str, Tensor]]` into `list[str]`
5172+
experts = [k for d in self._experts for k in d.keys()]
5173+
if len(experts) > 0:
5174+
raise ValueError(f"Unprocessed experts: {experts}")
5175+
5176+
50605177
@ModelBase.register("CohereForCausalLM")
50615178
class CommandR2Model(TextModel):
50625179
model_arch = gguf.MODEL_ARCH.COMMAND_R

gguf-py/gguf/constants.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ class MODEL_ARCH(IntEnum):
332332
ARWKV7 = auto()
333333
MAMBA = auto()
334334
MAMBA2 = auto()
335+
JAMBA = auto()
335336
BAMBA = auto()
336337
XVERSE = auto()
337338
COMMAND_R = auto()
@@ -434,7 +435,10 @@ class MODEL_TENSOR(IntEnum):
434435
SSM_CONV1D = auto()
435436
SSM_X = auto()
436437
SSM_DT = auto()
438+
SSM_DT_NORM = auto()
437439
SSM_A = auto()
440+
SSM_B_NORM = auto()
441+
SSM_C_NORM = auto()
438442
SSM_D = auto()
439443
SSM_NORM = auto()
440444
SSM_OUT = auto()
@@ -637,6 +641,7 @@ class MODEL_TENSOR(IntEnum):
637641
MODEL_ARCH.ARWKV7: "arwkv7",
638642
MODEL_ARCH.MAMBA: "mamba",
639643
MODEL_ARCH.MAMBA2: "mamba2",
644+
MODEL_ARCH.JAMBA: "jamba",
640645
MODEL_ARCH.BAMBA: "bamba",
641646
MODEL_ARCH.XVERSE: "xverse",
642647
MODEL_ARCH.COMMAND_R: "command-r",
@@ -739,7 +744,10 @@ class MODEL_TENSOR(IntEnum):
739744
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
740745
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
741746
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
747+
MODEL_TENSOR.SSM_DT_NORM: "blk.{bid}.ssm_dt_norm",
742748
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
749+
MODEL_TENSOR.SSM_B_NORM: "blk.{bid}.ssm_b_norm",
750+
MODEL_TENSOR.SSM_C_NORM: "blk.{bid}.ssm_c_norm",
743751
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
744752
MODEL_TENSOR.SSM_NORM: "blk.{bid}.ssm_norm",
745753
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
@@ -1739,6 +1747,34 @@ class MODEL_TENSOR(IntEnum):
17391747
MODEL_TENSOR.SSM_NORM,
17401748
MODEL_TENSOR.SSM_OUT,
17411749
],
1750+
MODEL_ARCH.JAMBA: [
1751+
MODEL_TENSOR.TOKEN_EMBD,
1752+
MODEL_TENSOR.OUTPUT_NORM,
1753+
MODEL_TENSOR.OUTPUT,
1754+
MODEL_TENSOR.ATTN_NORM,
1755+
MODEL_TENSOR.ATTN_Q,
1756+
MODEL_TENSOR.ATTN_K,
1757+
MODEL_TENSOR.ATTN_V,
1758+
MODEL_TENSOR.ATTN_OUT,
1759+
MODEL_TENSOR.SSM_IN,
1760+
MODEL_TENSOR.SSM_CONV1D,
1761+
MODEL_TENSOR.SSM_X,
1762+
MODEL_TENSOR.SSM_DT,
1763+
MODEL_TENSOR.SSM_DT_NORM,
1764+
MODEL_TENSOR.SSM_A,
1765+
MODEL_TENSOR.SSM_B_NORM,
1766+
MODEL_TENSOR.SSM_C_NORM,
1767+
MODEL_TENSOR.SSM_D,
1768+
MODEL_TENSOR.SSM_OUT,
1769+
MODEL_TENSOR.FFN_GATE_INP,
1770+
MODEL_TENSOR.FFN_NORM,
1771+
MODEL_TENSOR.FFN_GATE,
1772+
MODEL_TENSOR.FFN_DOWN,
1773+
MODEL_TENSOR.FFN_UP,
1774+
MODEL_TENSOR.FFN_GATE_EXP,
1775+
MODEL_TENSOR.FFN_DOWN_EXP,
1776+
MODEL_TENSOR.FFN_UP_EXP,
1777+
],
17421778
MODEL_ARCH.BAMBA: [
17431779
MODEL_TENSOR.TOKEN_EMBD,
17441780
MODEL_TENSOR.OUTPUT_NORM,

gguf-py/gguf/tensor_mapping.py

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,8 @@ class TensorNameMap:
279279
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
280280
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
281281
"transformer.layers.{bid}.ffn_norm", # openelm
282+
"model.layers.{bid}.pre_ff_layernorm", # jamba
283+
"model.layers.{bid}.pre_moe_layernorm", # mini-jamba
282284
"model.layers.{bid}.post_attention_layernorm", # llama4
283285
"transformer_encoder.{bid}.ffn_norm", # neobert
284286
"model.layers.{bid}.pre_ff_layernorm", # bamba
@@ -301,6 +303,7 @@ class TensorNameMap:
301303
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
302304
"transformer.decoder_layer.{bid}.router", # Grok
303305
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
306+
"model.layers.{bid}.feed_forward.router", # jamba
304307
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
305308
"model.layers.{bid}.feed_forward.router", # llama4
306309
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
@@ -343,6 +346,7 @@ class TensorNameMap:
343346
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU)
344347
"encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU)
345348
"model.layers.{bid}.residual_mlp.w3", # arctic
349+
"model.layers.{bid}.feed_forward.up_proj", # jamba
346350
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
347351
"transformer.h.{bid}.mlp.c_fc_1", # exaone
348352
"model.layers.{bid}.feed_forward.up_proj", # llama4
@@ -383,6 +387,7 @@ class TensorNameMap:
383387
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
384388
"transformer.h.{bid}.mlp.linear_1", # refact
385389
"model.layers.{bid}.residual_mlp.w1", # arctic
390+
"model.layers.{bid}.feed_forward.gate_proj", # jamba
386391
"transformer.h.{bid}.mlp.c_fc_0", # exaone
387392
"language_model.model.layers.{bid}.feed_forward.gate_proj", # llama4
388393
"model.layers.{bid}.feed_forward.gate_proj", # bamba
@@ -428,6 +433,7 @@ class TensorNameMap:
428433
"transformer.layers.{bid}.ffn.proj_2", # openelm
429434
"model.layers.{bid}.residual_mlp.w2", # arctic
430435
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
436+
"model.layers.{bid}.feed_forward.down_proj", # jamba
431437
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
432438
"model.layers.h.{bid}.mlp.c_proj", # exaone
433439
"model.layers.{bid}.feed_forward.down_proj", # llama4
@@ -549,38 +555,53 @@ class TensorNameMap:
549555
),
550556

551557
MODEL_TENSOR.SSM_IN: (
552-
"model.layers.{bid}.in_proj",
553-
"backbone.layers.{bid}.mixer.in_proj",
554-
"model.layers.{bid}.mamba.in_proj", # bamba
558+
"model.layers.{bid}.in_proj", # mamba-hf
559+
"backbone.layers.{bid}.mixer.in_proj", # mamba
560+
"model.layers.{bid}.mamba.in_proj", # jamba, bamba
555561
),
556562

557563
MODEL_TENSOR.SSM_CONV1D: (
558-
"model.layers.{bid}.conv1d",
559-
"backbone.layers.{bid}.mixer.conv1d",
560-
"model.layers.{bid}.mamba.conv1d", # bamba
564+
"model.layers.{bid}.conv1d", # mamba-hf
565+
"backbone.layers.{bid}.mixer.conv1d", # mamba
566+
"model.layers.{bid}.mamba.conv1d", # jamba, bamba
561567
),
562568

563569
MODEL_TENSOR.SSM_X: (
564-
"model.layers.{bid}.x_proj",
565-
"backbone.layers.{bid}.mixer.x_proj",
570+
"model.layers.{bid}.x_proj", # mamba-hf
571+
"backbone.layers.{bid}.mixer.x_proj", # mamba
572+
"model.layers.{bid}.mamba.x_proj", # jamba
566573
),
567574

568575
MODEL_TENSOR.SSM_DT: (
569-
"model.layers.{bid}.dt_proj",
570-
"backbone.layers.{bid}.mixer.dt_proj",
571-
"model.layers.{bid}.mamba.dt_proj", # bamba
576+
"model.layers.{bid}.dt_proj", # mamba-hf
577+
"backbone.layers.{bid}.mixer.dt_proj", # mamba
578+
"model.layers.{bid}.mamba.dt_proj", # jamba, bamba
579+
),
580+
581+
MODEL_TENSOR.SSM_DT_NORM: (
582+
"model.layers.{bid}.mamba.dt_layernorm", # jamba
572583
),
573584

574585
MODEL_TENSOR.SSM_A: (
575-
"model.layers.{bid}.A_log",
576-
"backbone.layers.{bid}.mixer.A_log",
577-
"model.layers.{bid}.mamba.A_log", # bamba
586+
"model.layers.{bid}.A_log", # mamba-hf
587+
"backbone.layers.{bid}.mixer.A_log", # mamba
588+
"model.layers.{bid}.mamba.A_log", # jamba, bamba
589+
),
590+
591+
MODEL_TENSOR.SSM_B_NORM: (
592+
"model.layers.{bid}.mamba.b_layernorm", # jamba
593+
"model.layers.{bid}.mamba.B_layernorm", # mini-jamba
594+
),
595+
596+
MODEL_TENSOR.SSM_C_NORM: (
597+
"model.layers.{bid}.mamba.c_layernorm", # jamba
598+
"model.layers.{bid}.mamba.C_layernorm", # mini-jamba
578599
),
579600

580601
MODEL_TENSOR.SSM_D: (
581-
"model.layers.{bid}.D",
582-
"backbone.layers.{bid}.mixer.D",
583-
"model.layers.{bid}.mamba.D", # bamba
602+
"model.layers.{bid}.D", # mamba-hf
603+
"backbone.layers.{bid}.mixer.D", # mamba
604+
"model.layers.{bid}.mamba.D", # jamba, bamba
584605
),
585606

586607
MODEL_TENSOR.SSM_NORM: (
@@ -589,9 +610,9 @@ class TensorNameMap:
589610
),
590611

591612
MODEL_TENSOR.SSM_OUT: (
592-
"model.layers.{bid}.out_proj",
593-
"backbone.layers.{bid}.mixer.out_proj",
594-
"model.layers.{bid}.mamba.out_proj", # bamba
613+
"model.layers.{bid}.out_proj", # mamba-hf
614+
"backbone.layers.{bid}.mixer.out_proj", # mamba
615+
"model.layers.{bid}.mamba.out_proj", # jamba, bamba
595616
),
596617

597618
MODEL_TENSOR.TIME_MIX_W0: (

src/llama-arch.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
4646
{ LLM_ARCH_STARCODER2, "starcoder2" },
4747
{ LLM_ARCH_MAMBA, "mamba" },
4848
{ LLM_ARCH_MAMBA2, "mamba2" },
49+
{ LLM_ARCH_JAMBA, "jamba" },
4950
{ LLM_ARCH_BAMBA, "bamba" },
5051
{ LLM_ARCH_XVERSE, "xverse" },
5152
{ LLM_ARCH_COMMAND_R, "command-r" },
@@ -1024,6 +1025,37 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
10241025
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
10251026
},
10261027
},
1028+
{
1029+
LLM_ARCH_JAMBA,
1030+
{
1031+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1032+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1033+
{ LLM_TENSOR_OUTPUT, "output" },
1034+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1035+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1036+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1037+
{ LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
1038+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1039+
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
1040+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1041+
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
1042+
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
1043+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1044+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1045+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1046+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1047+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1048+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1049+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1050+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1051+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1052+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1053+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1054+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1055+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1056+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1057+
},
1058+
},
10271059
{
10281060
LLM_ARCH_BAMBA,
10291061
{
@@ -1844,6 +1876,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
18441876
{LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
18451877
{LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
18461878
{LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
1879+
{LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1880+
{LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1881+
{LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
18471882
{LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
18481883
{LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
18491884
{LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
@@ -1992,6 +2027,7 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
19922027

19932028
bool llm_arch_is_hybrid(const llm_arch & arch) {
19942029
switch (arch) {
2030+
case LLM_ARCH_JAMBA:
19952031
case LLM_ARCH_BAMBA:
19962032
case LLM_ARCH_GRANITE_MOE_HYBRID:
19972033
return true;

0 commit comments

Comments
 (0)