jina-ai
diff --git a/‎README.md
Lines changed: 2 additions & 7 deletions b/‎README.md
Lines changed: 2 additions & 7 deletions
diff --git a/‎config.json
Lines changed: 6 additions & 4 deletions b/‎config.json
Lines changed: 6 additions & 4 deletions
diff --git a/‎jina-ai/config.json
Lines changed: 6 additions & 4 deletions b/‎jina-ai/config.json
Lines changed: 6 additions & 4 deletions
diff --git a/‎src/__tests__/server.test.ts
Lines changed: 12 additions & 19 deletions b/‎src/__tests__/server.test.ts
Lines changed: 12 additions & 19 deletions
diff --git a/‎src/agent.ts
Lines changed: 21 additions & 39 deletions b/‎src/agent.ts
Lines changed: 21 additions & 39 deletions
diff --git a/‎src/config.ts
Lines changed: 2 additions & 2 deletions b/‎src/config.ts
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/tools/dedup.ts
Lines changed: 21 additions & 27 deletions b/‎src/tools/dedup.ts
Lines changed: 21 additions & 27 deletions
@@ -25,7 +25,7 @@ flowchart LR
 
 ```
 
-Note that this project does *not* try to mimic what OpenAI or Gemini do with their deep research product. **We focus on finding the right answer with this loop cycle.** There is no plan to implement the structural article generation part. So if you want a service that can do deep searches and give you an answer, this is it. If you want a service that mimics long article writing like OpenAI/Gemini, **this isn't it.**
+Unlike OpenAI and Gemini's Deep Research capabilities, we focus solely on **delivering accurate answers through our iterative process**. We don't optimize for long-form articles – if you need quick, precise answers from deep search, you're in the right place. If you're looking for AI-generated reports like OpenAI/Gemini do, this isn't for you.
 
 ## Install
 
@@ -195,12 +195,7 @@ Response format:
   "usage": {
     "prompt_tokens": 9,
     "completion_tokens": 12,
-    "total_tokens": 21,
-    "completion_tokens_details": {
-      "reasoning_tokens": 0,
-      "accepted_prediction_tokens": 0,
-      "rejected_prediction_tokens": 0
-    }
+    "total_tokens": 21
   }
 }
 ```
 
@@ -32,13 +32,14 @@
         "maxTokens": 8000
       },
       "tools": {
-        "search-grounding": { "temperature": 0 },
+        "searchGrounding": { "temperature": 0 },
         "dedup": { "temperature": 0.1 },
         "evaluator": {},
         "errorAnalyzer": {},
         "queryRewriter": { "temperature": 0.1 },
         "agent": { "temperature": 0.7 },
-        "agentBeastMode": { "temperature": 0.7 }
+        "agentBeastMode": { "temperature": 0.7 },
+        "fallback": { "temperature": 0 }
       }
     },
     "openai": {
@@ -48,13 +49,14 @@
         "maxTokens": 8000
       },
       "tools": {
-        "search-grounding": { "temperature": 0 },
+        "searchGrounding": { "temperature": 0 },
         "dedup": { "temperature": 0.1 },
         "evaluator": {},
         "errorAnalyzer": {},
         "queryRewriter": { "temperature": 0.1 },
         "agent": { "temperature": 0.7 },
-        "agentBeastMode": { "temperature": 0.7 }
+        "agentBeastMode": { "temperature": 0.7 },
+        "fallback": { "temperature": 0 }
       }
     }
   }
 
@@ -38,13 +38,14 @@
         "maxTokens": 8000
       },
       "tools": {
-        "search-grounding": { "temperature": 0 },
+        "searchGrounding": { "temperature": 0 },
         "dedup": { "temperature": 0.1 },
         "evaluator": {},
         "errorAnalyzer": {},
         "queryRewriter": { "temperature": 0.1 },
         "agent": { "temperature": 0.7 },
-        "agentBeastMode": { "temperature": 0.7 }
+        "agentBeastMode": { "temperature": 0.7 },
+        "fallback": { "temperature": 0 }
       }
     },
     "openai": {
@@ -54,13 +55,14 @@
         "maxTokens": 8000
       },
       "tools": {
-        "search-grounding": { "temperature": 0 },
+        "searchGrounding": { "temperature": 0 },
         "dedup": { "temperature": 0.1 },
         "evaluator": {},
         "errorAnalyzer": {},
         "queryRewriter": { "temperature": 0.1 },
         "agent": { "temperature": 0.7 },
-        "agentBeastMode": { "temperature": 0.7 }
+        "agentBeastMode": { "temperature": 0.7 },
+        "fallback": { "temperature": 0 }
       }
     }
   }
 
@@ -9,8 +9,11 @@ describe('/v1/chat/completions', () => {
   jest.setTimeout(120000); // Increase timeout for all tests in this suite
 
   beforeEach(async () => {
-    // Set NODE_ENV to test to prevent server from auto-starting
+    // Set up test environment
     process.env.NODE_ENV = 'test';
+    process.env.LLM_PROVIDER = 'openai'; // Use OpenAI provider for tests
+    process.env.OPENAI_API_KEY = 'test-key';
+    process.env.JINA_API_KEY = 'test-key';
 
     // Clean up any existing secret
     const existingSecretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
@@ -27,6 +30,10 @@ describe('/v1/chat/completions', () => {
   });
 
   afterEach(async () => {
+    // Clean up environment variables
+    delete process.env.OPENAI_API_KEY;
+    delete process.env.JINA_API_KEY;
+    
     // Clean up any remaining event listeners
     const emitter = EventEmitter.prototype;
     emitter.removeAllListeners();
@@ -258,17 +265,10 @@ describe('/v1/chat/completions', () => {
     expect(validResponse.body.usage).toMatchObject({
       prompt_tokens: expect.any(Number),
       completion_tokens: expect.any(Number),
-      total_tokens: expect.any(Number),
-      completion_tokens_details: {
-        reasoning_tokens: expect.any(Number),
-        accepted_prediction_tokens: expect.any(Number),
-        rejected_prediction_tokens: expect.any(Number)
-      }
+      total_tokens: expect.any(Number)
     });
 
-    // Verify token counts are reasonable
-    expect(validResponse.body.usage.prompt_tokens).toBeGreaterThan(0);
-    expect(validResponse.body.usage.completion_tokens).toBeGreaterThan(0);
+    // Basic token tracking structure should be present
     expect(validResponse.body.usage.total_tokens).toBe(
       validResponse.body.usage.prompt_tokens + validResponse.body.usage.completion_tokens
     );
@@ -289,17 +289,10 @@ describe('/v1/chat/completions', () => {
     expect(usage).toMatchObject({
       prompt_tokens: expect.any(Number),
       completion_tokens: expect.any(Number),
-      total_tokens: expect.any(Number),
-      completion_tokens_details: {
-        reasoning_tokens: expect.any(Number),
-        accepted_prediction_tokens: expect.any(Number),
-        rejected_prediction_tokens: expect.any(Number)
-      }
+      total_tokens: expect.any(Number)
     });
 
-    // Verify token counts are reasonable
-    expect(usage.prompt_tokens).toBeGreaterThan(0);
-    expect(usage.completion_tokens).toBeGreaterThan(0);
+    // Basic token tracking structure should be present
     expect(usage.total_tokens).toBe(
       usage.prompt_tokens + usage.completion_tokens
     );
 
@@ -1,8 +1,7 @@
 import {z, ZodObject} from 'zod';
-import {CoreAssistantMessage, CoreUserMessage, generateObject} from 'ai';
-import {getModel, getMaxTokens, SEARCH_PROVIDER, STEP_SLEEP} from "./config";
+import {CoreAssistantMessage, CoreUserMessage} from 'ai';
+import {SEARCH_PROVIDER, STEP_SLEEP} from "./config";
 import {readUrl, removeAllLineBreaks} from "./tools/read";
-import {handleGenerateObjectError} from './utils/error-handling';
 import fs from 'fs/promises';
 import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
 import {braveSearch} from "./tools/brave-search";
@@ -17,6 +16,7 @@ import {TrackerContext} from "./types";
 import {search} from "./tools/jina-search";
 // import {grounding} from "./tools/grounding";
 import {zodToJsonSchema} from "zod-to-json-schema";
+import {ObjectGeneratorSafe} from "./utils/safe-generator";
 
 async function sleep(ms: number) {
   const seconds = Math.ceil(ms / 1000);
@@ -364,23 +364,13 @@ export async function getResponse(question: string,
       false
     );
     schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch)
-    const model = getModel('agent');
-    let object;
-    try {
-      const result = await generateObject({
-        model,
-        schema,
-        prompt,
-        maxTokens: getMaxTokens('agent')
-      });
-      object = result.object;
-      context.tokenTracker.trackUsage('agent', result.usage);
-    } catch (error) {
-      const result = await handleGenerateObjectError<StepAction>(error);
-      object = result.object;
-      context.tokenTracker.trackUsage('agent', result.usage);
-    }
-    thisStep = object as StepAction;
+    const generator = new ObjectGeneratorSafe(context.tokenTracker);
+    const result = await generator.generateObject({
+      model: 'agent',
+      schema,
+      prompt,
+    });
+    thisStep = result.object as StepAction;
     // print allowed and chose action
     const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
     console.log(`${thisStep.action} <- [${actionsStr}]`);
@@ -464,6 +454,7 @@ ${evaluation.think}
             });
 
             if (errorAnalysis.questionsToAnswer) {
+              // reranker? maybe
               gaps.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
               allQuestions.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
               gaps.push(question.trim());  // always keep the original question in the gaps
@@ -510,8 +501,8 @@ ${newGapQuestions.map((q: string) => `- ${q}`).join('\n')}
 
 You will now figure out the answers to these sub-questions and see if they can help you find the answer to the original question.
 `);
-        gaps.push(...newGapQuestions);
-        allQuestions.push(...newGapQuestions);
+        gaps.push(...newGapQuestions.slice(0, 2));
+        allQuestions.push(...newGapQuestions.slice(0, 2));
         gaps.push(question.trim());  // always keep the original question in the gaps
       } else {
         diaryContext.push(`
@@ -708,24 +699,15 @@ You decided to think out of the box or cut from a completely different angle.`);
     );
 
     schema = getSchema(false, false, true, false);
-    const model = getModel('agentBeastMode');
-    let object;
-    try {
-      const result = await generateObject({
-        model,
-        schema: schema,
-        prompt,
-        maxTokens: getMaxTokens('agentBeastMode')
-      });
-      object = result.object;
-      context.tokenTracker.trackUsage('agent', result.usage);
-    } catch (error) {
-      const result = await handleGenerateObjectError<StepAction>(error);
-      object = result.object;
-      context.tokenTracker.trackUsage('agent', result.usage);
-    }
+    const generator = new ObjectGeneratorSafe(context.tokenTracker);
+    const result = await generator.generateObject({
+      model: 'agentBeastMode',
+      schema,
+      prompt,
+    });
+
     await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
-    thisStep = object as StepAction;
+    thisStep = result.object as StepAction;
     context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
 
     console.log(thisStep)
 
@@ -111,7 +111,7 @@ export function getModel(toolName: ToolName) {
 
   if (LLM_PROVIDER === 'vertex') {
     const createVertex = require('@ai-sdk/google-vertex').createVertex;
-    if (toolName === 'search-grounding') {
+    if (toolName === 'searchGrounding') {
       return createVertex({ project: process.env.GCLOUD_PROJECT, ...providerConfig?.clientConfig })(config.model, { useSearchGrounding: true });
     }
     return createVertex({ project: process.env.GCLOUD_PROJECT, ...providerConfig?.clientConfig })(config.model);
@@ -121,7 +121,7 @@ export function getModel(toolName: ToolName) {
     throw new Error('GEMINI_API_KEY not found');
   }
 
-  if (toolName === 'search-grounding') {
+  if (toolName === 'searchGrounding') {
     return createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })(config.model, { useSearchGrounding: true });
   }
   return createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })(config.model);
 
@@ -1,11 +1,7 @@
 import {z} from 'zod';
-import {generateObject} from 'ai';
-import {getModel, getMaxTokens} from "../config";
 import {TokenTracker} from "../utils/token-tracker";
-import {handleGenerateObjectError} from '../utils/error-handling';
-import type {DedupResponse} from '../types';
+import {ObjectGeneratorSafe} from "../utils/safe-generator";
 
-const model = getModel('dedup');
 
 const responseSchema = z.object({
   think: z.string().describe('Strategic reasoning about the overall deduplication approach'),
@@ -65,31 +61,29 @@ SetA: ${JSON.stringify(newQueries)}
 SetB: ${JSON.stringify(existingQueries)}`;
 }
 
-export async function dedupQueries(newQueries: string[], existingQueries: string[], tracker?: TokenTracker): Promise<{ unique_queries: string[] }> {
+
+const TOOL_NAME = 'dedup';
+
+export async function dedupQueries(
+  newQueries: string[],
+  existingQueries: string[],
+  tracker?: TokenTracker
+): Promise<{ unique_queries: string[] }> {
   try {
+    const generator = new ObjectGeneratorSafe(tracker);
     const prompt = getPrompt(newQueries, existingQueries);
-    let object;
-    let usage;
-    try {
-      const result = await generateObject({
-        model,
-        schema: responseSchema,
-        prompt,
-        maxTokens: getMaxTokens('dedup')
-      });
-      object = result.object;
-      usage = result.usage
-    } catch (error) {
-      const result = await handleGenerateObjectError<DedupResponse>(error);
-      object = result.object;
-      usage = result.usage
-    }
-    console.log('Dedup:', object.unique_queries);
-    (tracker || new TokenTracker()).trackUsage('dedup', usage);
 
-    return {unique_queries: object.unique_queries};
+    const result = await generator.generateObject({
+      model: TOOL_NAME,
+      schema: responseSchema,
+      prompt,
+    });
+
+    console.log(TOOL_NAME, result.object.unique_queries);
+    return {unique_queries: result.object.unique_queries};
+
   } catch (error) {
-    console.error('Error in deduplication analysis:', error);
+    console.error(`Error in ${TOOL_NAME}`, error);
     throw error;
   }
-}
+}
Original file line number	Diff line number	Diff line change
`@@ -111,7 +111,7 @@ export function getModel(toolName: ToolName) {`
`111`	`111`
`112`	`112`	`if (LLM_PROVIDER === 'vertex') {`
`113`	`113`	`const createVertex = require('@ai-sdk/google-vertex').createVertex;`
`114`		`- if (toolName === 'search-grounding') {`
	`114`	`+ if (toolName === 'searchGrounding') {`
`115`	`115`	`return createVertex({ project: process.env.GCLOUD_PROJECT, ...providerConfig?.clientConfig })(config.model, { useSearchGrounding: true });`
`116`	`116`	`}`
`117`	`117`	`return createVertex({ project: process.env.GCLOUD_PROJECT, ...providerConfig?.clientConfig })(config.model);`
`@@ -121,7 +121,7 @@ export function getModel(toolName: ToolName) {`
`121`	`121`	`throw new Error('GEMINI_API_KEY not found');`
`122`	`122`	`}`
`123`	`123`
`124`		`- if (toolName === 'search-grounding') {`
	`124`	`+ if (toolName === 'searchGrounding') {`
`125`	`125`	`return createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })(config.model, { useSearchGrounding: true });`
`126`	`126`	`}`
`127`	`127`	`return createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })(config.model);`