@@ -288,101 +288,189 @@ public <T> ResponseEntity<EmbeddingList<Embedding>> embeddings(EmbeddingRequest<
288
288
* information about the model's context window, maximum output tokens, and knowledge
289
289
* cutoff date.
290
290
* <p>
291
- * <b>References:</b>
292
- * <ul>
293
- * <li><a href="https://platform.openai.com/docs/models#gpt-4o">GPT-4o</a></li>
294
- * <li><a href="https://platform.openai.com/docs/models#gpt-4-and-gpt-4-turbo">GPT-4
295
- * and GPT-4 Turbo</a></li>
296
- * <li><a href="https://platform.openai.com/docs/models#gpt-3-5-turbo">GPT-3.5
297
- * Turbo</a></li>
298
- * <li><a href="https://platform.openai.com/docs/models#o1-and-o1-mini">o1 and
299
- * o1-mini</a></li>
300
- * <li><a href="https://platform.openai.com/docs/models#o3-mini">o3-mini</a></li>
301
- * </ul>
291
+ * <b>References:</b> <a href="https://platform.openai.com/docs/models">OpenAI Models
292
+ * Documentation</a>
302
293
*/
303
294
public enum ChatModel implements ChatModelDescription {
304
295
296
+ // --- Reasoning Models ---
297
+
305
298
/**
306
- * <b>o1 </b> is trained with reinforcement learning to perform complex reasoning.
307
- * It thinks before it answers, producing a long internal chain of thought before
308
- * responding to the user .
299
+ * <b>o4-mini </b> is the latest small o-series model. It's optimized for fast,
300
+ * effective reasoning with exceptionally efficient performance in coding and
301
+ * visual tasks .
309
302
* <p>
310
- * The latest o1 model supports both text and image inputs, and produces text
311
- * outputs (including Structured Outputs) .
303
+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
304
+ * cutoff: June 1, 2024 .
312
305
* <p>
313
- * The knowledge cutoff for o1 is October, 2023.
306
+ * Model ID: o4-mini
314
307
* <p>
308
+ * See: <a href="https://platform.openai.com/docs/models/o4-mini">o4-mini</a>
315
309
*/
316
- O1 ("o1" ),
310
+ O4_MINI ("o4-mini" ),
311
+
317
312
/**
318
- * <b>o1-preview</b> is trained with reinforcement learning to perform complex
319
- * reasoning. It thinks before it answers, producing a long internal chain of
320
- * thought before responding to the user.
313
+ * <b>o3</b> is a well-rounded and powerful model across domains. It sets a new
314
+ * standard for math, science, coding, and visual reasoning tasks. It also excels
315
+ * at technical writing and instruction-following. Use it to think through
316
+ * multi-step problems that involve analysis across text, code, and images.
321
317
* <p>
322
- * The latest o1-preview model supports both text and image inputs, and produces
323
- * text outputs (including Structured Outputs) .
318
+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
319
+ * cutoff: June 1, 2024 .
324
320
* <p>
325
- * The knowledge cutoff for o1-preview is October, 2023.
321
+ * Model ID: o3
326
322
* <p>
323
+ * See: <a href="https://platform.openai.com/docs/models/o3">o3</a>
327
324
*/
328
- O1_PREVIEW ("o1-preview" ),
325
+ O3 ("o3" ),
326
+
327
+ /**
328
+ * <b>o3-mini</b> is a small reasoning model, providing high intelligence at cost
329
+ * and latency targets similar to o1-mini. o3-mini supports key developer
330
+ * features, like Structured Outputs, function calling, Batch API.
331
+ * <p>
332
+ * The knowledge cutoff for o3-mini models is October, 2023.
333
+ * <p>
334
+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
335
+ * cutoff: October 1, 2023.
336
+ * <p>
337
+ * Model ID: o3-mini
338
+ * <p>
339
+ * See: <a href="https://platform.openai.com/docs/models/o3-mini">o3-mini</a>
340
+ */
341
+ O3_MINI ("o3-mini" ),
342
+
343
+ /**
344
+ * The <b>o1</b> series of models are trained with reinforcement learning to
345
+ * perform complex reasoning. o1 models think before they answer, producing a long
346
+ * internal chain of thought before responding to the user.
347
+ * <p>
348
+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
349
+ * cutoff: October 1, 2023.
350
+ * <p>
351
+ * Model ID: o1
352
+ * <p>
353
+ * See: <a href="https://platform.openai.com/docs/models/o1">o1</a>
354
+ */
355
+ O1 ("o1" ),
329
356
330
357
/**
331
358
* <b>o1-mini</b> is a faster and more affordable reasoning model compared to o1.
332
359
* o1-mini currently only supports text inputs and outputs.
333
360
* <p>
334
- * The knowledge cutoff for o1-mini is October, 2023.
361
+ * Context window: 128,000 tokens. Max output tokens: 65,536 tokens. Knowledge
362
+ * cutoff: October 1, 2023.
335
363
* <p>
364
+ * Model ID: o1-mini
365
+ * <p>
366
+ * See: <a href="https://platform.openai.com/docs/models/o1-mini">o1-mini</a>
336
367
*/
337
368
O1_MINI ("o1-mini" ),
369
+
338
370
/**
339
- * <b>o3-mini</b> is our most recent small reasoning model, providing high
340
- * intelligence at the same cost and latency targets of o1-mini. o3-mini also
341
- * supports key developer features, like Structured Outputs, function calling,
342
- * Batch API, and more. Like other models in the o-series, it is designed to excel
343
- * at science, math, and coding tasks.
371
+ * The <b>o1-pro</b> model, part of the o1 series trained with reinforcement
372
+ * learning for complex reasoning, uses more compute to think harder and provide
373
+ * consistently better answers.
344
374
* <p>
345
- * The knowledge cutoff for o3-mini models is October, 2023.
375
+ * Note: o1-pro is available in the Responses API only to enable support for
376
+ * multi-turn model interactions and other advanced API features.
377
+ * <p>
378
+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
379
+ * cutoff: October 1, 2023.
380
+ * <p>
381
+ * Model ID: o1-pro
346
382
* <p>
383
+ * See: <a href="https://platform.openai.com/docs/models/o1-pro">o1-pro</a>
347
384
*/
348
- O3_MINI ("o3-mini" ),
385
+ O1_PRO ("o1-pro" ),
386
+
387
+ // --- Flagship Models ---
349
388
350
389
/**
351
- * <b>GPT-4o ("omni")</b> is our versatile, high-intelligence flagship model. It
352
- * accepts both text and image inputs and produces text outputs (including
353
- * Structured Outputs).
390
+ * <b>GPT-4.1</b> is the flagship model for complex tasks. It is well suited for
391
+ * problem solving across domains.
392
+ * <p>
393
+ * Context window: 1,047,576 tokens. Max output tokens: 32,768 tokens. Knowledge
394
+ * cutoff: June 1, 2024.
354
395
* <p>
355
- * The knowledge cutoff for GPT-4o models is October, 2023.
396
+ * Model ID: gpt-4.1
356
397
* <p>
398
+ * See: <a href="https://platform.openai.com/docs/models/gpt-4.1">gpt-4.1</a>
399
+ */
400
+ GPT_4_1 ("gpt-4.1" ),
401
+
402
+ /**
403
+ * <b>GPT-4o</b> (“o” for “omni”) is the versatile, high-intelligence flagship
404
+ * model. It accepts both text and image inputs, and produces text outputs
405
+ * (including Structured Outputs). It is considered the best model for most tasks,
406
+ * and the most capable model outside of the o-series models.
407
+ * <p>
408
+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
409
+ * cutoff: October 1, 2023.
410
+ * <p>
411
+ * Model ID: gpt-4o
412
+ * <p>
413
+ * See: <a href="https://platform.openai.com/docs/models/gpt-4o">gpt-4o</a>
357
414
*/
358
415
GPT_4_O ("gpt-4o" ),
416
+
359
417
/**
360
418
* The <b>chatgpt-4o-latest</b> model ID continuously points to the version of
361
419
* GPT-4o used in ChatGPT. It is updated frequently when there are significant
362
420
* changes to ChatGPT's GPT-4o model.
363
421
* <p>
364
422
* Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
365
- * cutoff: October, 2023.
423
+ * cutoff: October 1, 2023.
424
+ * <p>
425
+ * Model ID: chatgpt-4o-latest
426
+ * <p>
427
+ * See: <a href=
428
+ * "https://platform.openai.com/docs/models/chatgpt-4o-latest">chatgpt-4o-latest</a>
366
429
*/
367
430
CHATGPT_4_O_LATEST ("chatgpt-4o-latest" ),
368
431
369
432
/**
370
- * <b>GPT-4o Audio</b> is a preview release model that accepts audio inputs and
371
- * outputs and can be used in the Chat Completions REST API.
433
+ * <b>GPT-4o Audio Preview</b> represents a preview release of models that accept
434
+ * audio inputs and outputs via the Chat Completions REST API.
435
+ * <p>
436
+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
437
+ * cutoff: October 1, 2023.
372
438
* <p>
373
- * The knowledge cutoff for GPT -4o Audio models is October, 2023.
439
+ * Model ID: gpt -4o-audio-preview
374
440
* <p>
441
+ * See: <a href=
442
+ * "https://platform.openai.com/docs/models/gpt-4o-audio-preview">gpt-4o-audio-preview</a>
375
443
*/
376
444
GPT_4_O_AUDIO_PREVIEW ("gpt-4o-audio-preview" ),
377
445
446
+ // --- Cost-Optimized Models ---
447
+
378
448
/**
379
- * <b>GPT-4o-mini Audio</b> is a preview release model that accepts audio inputs
380
- * and outputs and can be used in the Chat Completions REST API.
449
+ * <b>GPT-4.1-mini</b> provides a balance between intelligence, speed, and cost
450
+ * that makes it an attractive model for many use cases.
451
+ * <p>
452
+ * Context window: 1,047,576 tokens. Max output tokens: 32,768 tokens. Knowledge
453
+ * cutoff: June 1, 2024.
381
454
* <p>
382
- * The knowledge cutoff for GPT-4o -mini Audio models is October, 2023.
455
+ * Model ID: gpt-4.1 -mini
383
456
* <p>
457
+ * See:
458
+ * <a href="https://platform.openai.com/docs/models/gpt-4.1-mini">gpt-4.1-mini</a>
384
459
*/
385
- GPT_4_O_MINI_AUDIO_PREVIEW ("gpt-4o-mini-audio-preview" ),
460
+ GPT_4_1_MINI ("gpt-4.1-mini" ),
461
+
462
+ /**
463
+ * <b>GPT-4.1-nano</b> is the fastest, most cost-effective GPT-4.1 model.
464
+ * <p>
465
+ * Context window: 1,047,576 tokens. Max output tokens: 32,768 tokens. Knowledge
466
+ * cutoff: June 1, 2024.
467
+ * <p>
468
+ * Model ID: gpt-4.1-nano
469
+ * <p>
470
+ * See:
471
+ * <a href="https://platform.openai.com/docs/models/gpt-4.1-nano">gpt-4.1-nano</a>
472
+ */
473
+ GPT_4_1_NANO ("gpt-4.1-nano" ),
386
474
387
475
/**
388
476
* <b>GPT-4o-mini</b> is a fast, affordable small model for focused tasks. It
@@ -391,80 +479,106 @@ public enum ChatModel implements ChatModelDescription {
391
479
* larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar
392
480
* results at lower cost and latency.
393
481
* <p>
394
- * The knowledge cutoff for GPT-4o-mini models is October, 2023.
482
+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
483
+ * cutoff: October 1, 2023.
484
+ * <p>
485
+ * Model ID: gpt-4o-mini
395
486
* <p>
487
+ * See:
488
+ * <a href="https://platform.openai.com/docs/models/gpt-4o-mini">gpt-4o-mini</a>
396
489
*/
397
490
GPT_4_O_MINI ("gpt-4o-mini" ),
398
491
399
492
/**
400
- * <b>GPT-4 Turbo</b> is a high-intelligence GPT model with vision capabilities,
401
- * usable in Chat Completions. Vision requests can now use JSON mode and function
402
- * calling.
493
+ * <b>GPT-4o-mini Audio Preview</b> is a preview release model that accepts audio
494
+ * inputs and outputs and can be used in the Chat Completions REST API.
403
495
* <p>
404
- * The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.
496
+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
497
+ * cutoff: October 1, 2023.
498
+ * <p>
499
+ * Model ID: gpt-4o-mini-audio-preview
405
500
* <p>
501
+ * See: <a href=
502
+ * "https://platform.openai.com/docs/models/gpt-4o-mini-audio-preview">gpt-4o-mini-audio-preview</a>
406
503
*/
407
- GPT_4_TURBO ("gpt-4-turbo" ),
504
+ GPT_4_O_MINI_AUDIO_PREVIEW ("gpt-4o-mini-audio-preview" ),
505
+
506
+ // --- Realtime Models ---
408
507
409
508
/**
410
- * <b>GPT-4-0125-preview</b> is the latest GPT-4 model intended to reduce cases of
411
- * “laziness” where the model doesn’t complete a task.
509
+ * <b>GPT-4o Realtime</b> model, is capable of responding to audio and text inputs
510
+ * in realtime over WebRTC or a WebSocket interface.
511
+ * <p>
512
+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
513
+ * cutoff: October 1, 2023.
514
+ * <p>
515
+ * Model ID: gpt-4o-realtime-preview
412
516
* <p>
413
- * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
517
+ * See: <a href=
518
+ * "https://platform.openai.com/docs/models/gpt-4o-realtime-preview">gpt-4o-realtime-preview</a>
414
519
*/
415
- GPT_4_0125_PREVIEW ("gpt-4-0125 -preview" ),
520
+ GPT_4O_REALTIME_PREVIEW ("gpt-4o-realtime -preview" ),
416
521
417
522
/**
418
- * Currently points to {@link #GPT_4_0125_PREVIEW}.
523
+ * <b>GPT-4o-mini Realtime</b> model, is capable of responding to audio and text
524
+ * inputs in realtime over WebRTC or a WebSocket interface.
525
+ * <p>
526
+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
527
+ * cutoff: October 1, 2023.
528
+ * <p>
529
+ * Model ID: gpt-4o-mini-realtime-preview
419
530
* <p>
420
- * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
531
+ * See: <a href=
532
+ * "https://platform.openai.com/docs/models/gpt-4o-mini-realtime-preview">gpt-4o-mini-realtime-preview</a>
421
533
*/
422
- GPT_4_1106_PREVIEW ("gpt-4-1106-preview" ),
534
+ GPT_4O_MINI_REALTIME_PREVIEW ("gpt-4o-mini-realtime-preview\n " ),
535
+
536
+ // --- Older GPT Models ---
423
537
424
538
/**
425
- * <b>GPT-4 Turbo Preview</b> is a high-intelligence GPT model usable in Chat
426
- * Completions.
539
+ * <b>GPT-4 Turbo</b> is the next generation of GPT-4, an older high-intelligence
540
+ * GPT model. It was designed to be a cheaper, better version of GPT-4. Today, we
541
+ * recommend using a newer model like GPT-4o.
542
+ * <p>
543
+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
544
+ * cutoff: Dec 01, 2023.
427
545
* <p>
428
- * Currently points to {@link #GPT_4_0125_PREVIEW}.
546
+ * Model ID: gpt-4-turbo
429
547
* <p>
430
- * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
548
+ * See:
549
+ * <a href="https://platform.openai.com/docs/models/gpt-4-turbo">gpt-4-turbo</a>
431
550
*/
432
- GPT_4_TURBO_PREVIEW ("gpt-4-turbo-preview " ),
551
+ GPT_4_TURBO ("gpt-4-turbo" ),
433
552
434
553
/**
435
554
* <b>GPT-4</b> is an older version of a high-intelligence GPT model, usable in
436
- * Chat Completions.
437
- * <p>
438
- * Currently points to {@link #GPT_4_0613}.
555
+ * Chat Completions. Vision capabilities may not be available.
439
556
* <p>
440
- * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
441
- */
442
- GPT_4 ("gpt-4" ),
443
- /**
444
- * GPT-4 model snapshot.
557
+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
558
+ * cutoff: Dec 01, 2023.
445
559
* <p>
446
- * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
447
- */
448
- GPT_4_0613 ("gpt-4-0613" ),
449
- /**
450
- * GPT-4 model snapshot.
560
+ * Model ID: gpt-4
451
561
* <p>
452
- * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
562
+ * See: <a href="https://platform.openai.com/docs/models/gpt-4">gpt-4</a>
453
563
*/
454
- GPT_4_0314 ("gpt-4-0314 " ),
564
+ GPT_4 ("gpt-4" ),
455
565
456
566
/**
457
567
* <b>GPT-3.5 Turbo</b> models can understand and generate natural language or
458
568
* code and have been optimized for chat using the Chat Completions API but work
459
- * well for non-chat tasks as well.
460
- * <p>
461
- * As of July 2024, {@link #GPT_4_O_MINI} should be used in place of
462
- * gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.
463
- * gpt-3.5-turbo is still available for use in the API.
569
+ * well for non-chat tasks as well. Generally lower cost but less capable than
570
+ * GPT-4 models.
464
571
* <p>
572
+ * As of July 2024, GPT-4o mini is recommended over gpt-3.5-turbo for most use
573
+ * cases.
465
574
* <p>
466
575
* Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
467
576
* cutoff: September, 2021.
577
+ * <p>
578
+ * Model ID: gpt-3.5-turbo
579
+ * <p>
580
+ * See: <a href=
581
+ * "https://platform.openai.com/docs/models/gpt-3.5-turbo">gpt-3.5-turbo</a>
468
582
*/
469
583
GPT_3_5_TURBO ("gpt-3.5-turbo" ),
470
584
0 commit comments