Skip to content

Commit 12abc41

Browse files
committed
add llava separator
1 parent 92afdfc commit 12abc41

File tree

1 file changed

+26
-3
lines changed

1 file changed

+26
-3
lines changed

gpttype_adapter.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,7 +1163,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
11631163
auto er = llama_decode(llama_ctx_v4, llama_batch_get_one(tmp.data(), tmp.size(), 0, 0));
11641164
if(er!=0)
11651165
{
1166-
printf("\nLLAMA EVAL returned nonzero!\n");
1166+
printf("\nLLAMA EVAL returned nonzero: %d\n",er);
11671167
}
11681168
return ModelLoadResult::SUCCESS;
11691169
}
@@ -1806,13 +1806,17 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
18061806
std::vector<int> embd_inp;
18071807
std::vector<int> embd_inp_mem; //for storing added memory
18081808
std::vector<int> llava_mem; //for storing dummy tokens that will be consumed by llava
1809+
std::vector<int> llava_sep; //to separate between different llava images
18091810

18101811
int32_t nctx = kcpp_params->n_ctx;
18111812

18121813
TokenizeString(kcpp_params->prompt, embd_inp, file_format);
18131814

18141815
if(clp_ctx!=nullptr && clp_img_data!=nullptr)
18151816
{
1817+
TokenizeString("\n\n", llava_sep, file_format,false);
1818+
int sepsize = llava_sep.size();
1819+
18161820
for(int i=0;i<llava_images.size();++i)
18171821
{
18181822
std::string llava_image = llava_images[i].b64data;
@@ -1834,11 +1838,13 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
18341838
}
18351839
if(llava_images[i].clp_image_tokens>0 && llava_images[i].clp_image_tokens < nctx)
18361840
{
1837-
for(int n=0;n<llava_images[i].clp_image_tokens;++n)
1841+
int tokcnt = (i==0?(llava_images[i].clp_image_tokens):(llava_images[i].clp_image_tokens+sepsize));
1842+
for(int n=0;n<tokcnt;++n)
18381843
{
18391844
llava_mem.push_back(current_llava_identifier);
18401845
}
1841-
}else
1846+
}
1847+
else
18421848
{
18431849
printf("\nWarning: LLAVA Image excluded - Context size too low or not enough clip tokens!\n");
18441850
}
@@ -2387,6 +2393,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
23872393
//batch is empty, do image processing
23882394
int llavatokenscounted = 0;
23892395
int llavatokensevaled = 0;
2396+
int sepsize = llava_sep.size();
23902397
while(input_consumed < embd_inp.size() && (embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_A || embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_B))
23912398
{
23922399
last_n_tokens.erase(last_n_tokens.begin());
@@ -2397,6 +2404,22 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
23972404
}
23982405
for(int i=0;i<llava_images.size();++i)
23992406
{
2407+
if(i>0 && sepsize>0)
2408+
{
2409+
//add a separator between each image
2410+
auto evr = llama_decode(llama_ctx_v4, llama_batch_get_one(llava_sep.data(), sepsize, n_past, 0));
2411+
if(evr!=0)
2412+
{
2413+
printf("\nError when appending llava separator: %d\n",evr);
2414+
}
2415+
else
2416+
{
2417+
printf("\rProcessing LLaVa Separator (%d tokens)",sepsize);
2418+
}
2419+
n_past += sepsize;
2420+
llavatokensevaled += sepsize;
2421+
}
2422+
24002423
if(allow_regular_prints)
24012424
{
24022425
printf("\rProcessing LLaVa Embedding %d (%d tokens)",(i+1), llava_images[i].clp_image_tokens);

0 commit comments

Comments
 (0)