|
13 | 13 | #include <vector>
|
14 | 14 | #include <ctime>
|
15 | 15 |
|
| 16 | +#define LLAMA_ASSERT(condition, ...) { \ |
| 17 | + if (!condition) { \ |
| 18 | + LOG_ERR(__VA_ARGS__); \ |
| 19 | + return 1; \ |
| 20 | + } \ |
| 21 | +} |
| 22 | + |
16 | 23 | // trim whitespace from the beginning and end of a string
|
17 | 24 | static std::string trim(const std::string & str) {
|
18 | 25 | size_t start = 0;
|
@@ -188,6 +195,9 @@ int main(int argc, char ** argv) {
|
188 | 195 | {
|
189 | 196 | LOG_INF("%s: Evaluating the system prompt ...\n", __func__);
|
190 | 197 |
|
| 198 | + LLAMA_ASSERT((batch.n_tokens + n_tokens_system < n_ctx), |
| 199 | + "%s: Unable to add system tokens (%d tokens) to batch due to context overflow. " |
| 200 | + "Consider increasing context size (%d).\n" , __func__, n_tokens_system, n_ctx); |
191 | 201 | for (int32_t i = 0; i < n_tokens_system; ++i) {
|
192 | 202 | llama_batch_add(batch, tokens_system[i], i, { 0 }, false);
|
193 | 203 | }
|
@@ -223,6 +233,9 @@ int main(int argc, char ** argv) {
|
223 | 233 |
|
224 | 234 | client.i_batch = batch.n_tokens;
|
225 | 235 |
|
| 236 | + LLAMA_ASSERT((batch.n_tokens + 1 < n_ctx), |
| 237 | + "%s: Unable to add client %d's sampled token to batch due to context overflow. " |
| 238 | + "Consider increasing context size (Found: %d).\n", __func__, client.id, n_ctx); |
226 | 239 | llama_batch_add(batch, client.sampled, n_tokens_system + client.n_prompt + client.n_decoded, { client.id + 1 }, true);
|
227 | 240 |
|
228 | 241 | client.n_decoded += 1;
|
@@ -258,7 +271,11 @@ int main(int argc, char ** argv) {
|
258 | 271 | std::vector<llama_token> tokens_prompt;
|
259 | 272 | tokens_prompt = ::llama_tokenize(ctx, client.prompt, false);
|
260 | 273 |
|
261 |
| - for (size_t i = 0; i < tokens_prompt.size(); ++i) { |
| 274 | + size_t n_tokens_prompt = tokens_prompt.size(); |
| 275 | + LLAMA_ASSERT((batch.n_tokens + n_tokens_prompt < n_ctx), |
| 276 | + "%s: Unable to add client %d's prompt tokens (%d tokens) to batch due to context overflow. " |
| 277 | + "Consider increasing context size (Found: %d).\n", __func__, client.id, n_tokens_prompt, n_ctx); |
| 278 | + for (size_t i = 0; i < n_tokens_prompt; ++i) { |
262 | 279 | llama_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id + 1 }, false);
|
263 | 280 | }
|
264 | 281 |
|
|
0 commit comments