Skip to content

Commit d17b7d5

Browse files
authored
Enable prompt caching for Amazon Nova by default (#1068)
1 parent 39a87cd commit d17b7d5

File tree

3 files changed

+73
-10
lines changed

3 files changed

+73
-10
lines changed

packages/cdk/lambda/utils/models.ts

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,13 @@ const NOVA_DEFAULT_PARAMS: ConverseInferenceParams = {
190190
temperature: 0.7,
191191
topP: 0.9,
192192
},
193+
// There are no additional costs for cache writes with Amazon Nova models
194+
promptCachingConfig: {
195+
autoCacheFields: {
196+
system: true,
197+
messages: true,
198+
},
199+
},
193200
};
194201

195202
const DEEPSEEK_DEFAULT_PARAMS: ConverseInferenceParams = {
@@ -211,17 +218,42 @@ const PALMYRA_DEFAULT_PARAMS: ConverseInferenceParams = {
211218
const USECASE_DEFAULT_PARAMS: UsecaseConverseInferenceParams = {
212219
'/chat': {
213220
promptCachingConfig: {
214-
autoCacheFields: ['system', 'messages'],
221+
autoCacheFields: {
222+
system: true,
223+
messages: true,
224+
},
215225
},
216226
},
217227
'/rag': {
218228
inferenceConfig: {
219229
temperature: 0.0,
220230
},
231+
promptCachingConfig: {
232+
autoCacheFields: {
233+
system: false,
234+
},
235+
},
221236
},
222237
'/diagram': {
223238
promptCachingConfig: {
224-
autoCacheFields: ['system'],
239+
autoCacheFields: {
240+
system: true,
241+
},
242+
},
243+
},
244+
'/use-case-builder': {
245+
promptCachingConfig: {
246+
autoCacheFields: {
247+
messages: false,
248+
},
249+
},
250+
},
251+
'/title': {
252+
promptCachingConfig: {
253+
autoCacheFields: {
254+
system: false,
255+
messages: false,
256+
},
225257
},
226258
},
227259
};
@@ -263,6 +295,11 @@ const createGuardrailStreamConfig = ():
263295
const idTransformationRules = [
264296
// Chat history -> Chat
265297
{ pattern: /^\/chat\/.+/, replacement: '/chat' },
298+
// Use case builder (/new and /execute/*)
299+
{
300+
pattern: /^\/use-case-builder\/.+/,
301+
replacement: '/use-case-builder',
302+
},
266303
];
267304

268305
// ID conversion
@@ -273,6 +310,23 @@ function normalizeId(id: string): string {
273310
return ret;
274311
}
275312

313+
const mergeConverseInferenceParams = (
314+
a: ConverseInferenceParams,
315+
b: ConverseInferenceParams
316+
) =>
317+
({
318+
inferenceConfig: {
319+
...a.inferenceConfig,
320+
...b.inferenceConfig,
321+
},
322+
promptCachingConfig: {
323+
autoCacheFields: {
324+
...a.promptCachingConfig?.autoCacheFields,
325+
...b.promptCachingConfig?.autoCacheFields,
326+
},
327+
},
328+
}) as ConverseInferenceParams;
329+
276330
// API call, extract string from output, etc.
277331

278332
const createConverseCommandInput = (
@@ -351,14 +405,17 @@ const createConverseCommandInput = (
351405

352406
// Merge model's default params with use-case specific ones
353407
const usecaseParams = usecaseConverseInferenceParams[normalizeId(id)] || {};
354-
const params = { ...defaultConverseInferenceParams, ...usecaseParams };
408+
const params = mergeConverseInferenceParams(
409+
defaultConverseInferenceParams,
410+
usecaseParams
411+
);
355412

356413
// Apply prompt caching
357-
const autoCacheFields = params.promptCachingConfig?.autoCacheFields || [];
358-
const conversationWithCache = autoCacheFields.includes('messages')
414+
const autoCacheFields = params.promptCachingConfig?.autoCacheFields || {};
415+
const conversationWithCache = autoCacheFields['messages']
359416
? applyAutoCacheToMessages(conversation, model.modelId)
360417
: conversation;
361-
const systemContextWithCache = autoCacheFields.includes('system')
418+
const systemContextWithCache = autoCacheFields['system']
362419
? applyAutoCacheToSystem(systemContext, model.modelId)
363420
: systemContext;
364421

@@ -377,7 +434,7 @@ const createConverseCommandInput = (
377434
model.modelParameters?.reasoningConfig?.type === 'enabled'
378435
) {
379436
converseCommandInput.inferenceConfig = {
380-
...(params.inferenceConfig || {}),
437+
...params.inferenceConfig,
381438
temperature: 1, // reasoning requires temperature to be 1
382439
topP: undefined, // reasoning does not require topP
383440
maxTokens:

packages/cdk/lambda/utils/promptCache.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,16 @@ export const applyAutoCacheToMessages = (
3232
const isToolsSupported = cacheFields.includes('tools');
3333
const cachableIndices = messages
3434
.map((message, index) => ({ message, index }))
35-
.filter(({ message }) => message.role === 'user')
35+
.filter(
36+
({ message }) =>
37+
message.role === 'user' &&
38+
!message.content?.some((block) => block.document || block.video)
39+
)
3640
.filter(
3741
({ message }) =>
3842
isToolsSupported ||
3943
// For Amazon Nova, placing cachePoint after toolResult is not supported
40-
!message.content?.some((content) => content.toolResult)
44+
!message.content?.some((block) => block.toolResult)
4145
)
4246
.slice(-2)
4347
.map(({ index }) => index);

packages/types/src/text.d.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ import { InferenceConfiguration } from '@aws-sdk/client-bedrock-runtime';
44

55
export type PromptCacheField = 'messages' | 'system' | 'tools';
66
export type PromptCachingConfig = {
7-
autoCacheFields: PromptCacheField[];
7+
autoCacheFields: {
8+
[key in PromptCacheField]?: boolean;
9+
};
810
};
911

1012
// https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html#API_runtime_Converse_RequestSyntax

0 commit comments

Comments
 (0)