Skip to content

Commit 62a58de

Browse files
committed
Enable support for multiple AI review types via CLI
- Review type can now be specified as a command-line argument - 4 types for now: ALL (used by frontend), description rating, source rating, glossary extraction - Output types are still tied to typed classes, but may become configurable in the future - Fixed issue with the frontend review All options (prompt, input type, output type) could eventually be passed via CLI and/or configured in the application.properties, but we’ll hold off to avoid overcomplicating for now
1 parent f1930cd commit 62a58de

File tree

8 files changed

+376
-168
lines changed

8 files changed

+376
-168
lines changed

cli/src/main/java/com/box/l10n/mojito/cli/command/RepositoryAiReviewCommand.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ public class RepositoryAiReviewCommand extends Command {
8181
description = "Use a specific model for the review")
8282
String useModel;
8383

84+
@Parameter(
85+
names = {"--review-type"},
86+
arity = 1,
87+
description = "The type of review to run")
88+
String reviewType = "ALL";
89+
8490
@Parameter(
8591
names = {"--run-name"},
8692
arity = 1,
@@ -174,7 +180,8 @@ public void execute() throws CommandException {
174180
textUnitIds,
175181
useBatch,
176182
useModel,
177-
runName));
183+
runName,
184+
reviewType));
178185

179186
PollableTask pollableTask = protoAiTranslateResponse.pollableTask();
180187
consoleWriter.a("Running, task id: ").fg(Color.MAGENTA).a(pollableTask.getId()).println();

restclient/src/main/java/com/box/l10n/mojito/rest/client/RepositoryAiReviewClient.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ public record ProtoAiReviewRequest(
4343
List<Long> tmTextUnitIds,
4444
boolean useBatch,
4545
String useModel,
46-
String runName) {}
46+
String runName,
47+
String reviewType) {}
4748

4849
public record ProtoAiReviewResponse(PollableTask pollableTask) {}
4950

webapp/src/main/java/com/box/l10n/mojito/entity/AiReviewProto.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.box.l10n.mojito.entity;
22

3+
import com.box.l10n.mojito.service.oaireview.AiReviewService;
34
import jakarta.persistence.Column;
45
import jakarta.persistence.Entity;
56
import jakarta.persistence.FetchType;
@@ -12,8 +13,7 @@
1213
/**
1314
* Use run_name to manage multiple reviews of the same text units, it is denormalized but just want
1415
* something very simple for now. The review is stored a JSON blob defined in the {@link
15-
* com.box.l10n.mojito.service.oaireview.AiReviewService.AiReviewSingleTextUnitOutput} but that
16-
* format could change any time.
16+
* AiReviewService.AiReviewTextUnitVariantOutput} but that format could change any time.
1717
*/
1818
@Entity
1919
@Table(
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
package com.box.l10n.mojito.rest.textunit;
2+
3+
import java.util.List;
4+
5+
public enum AiReviewType {
6+
ALL(
7+
"Run all checks on text unit variants, same format as used in frontend",
8+
true,
9+
AiReviewType.PROMPT_ALL,
10+
AiReviewTextUnitVariantOutput.class),
11+
DESCRIPTION_RATING(
12+
"Check the text unit description for completeness and clarity",
13+
false,
14+
"""
15+
You are a senior localization QA reviewer.
16+
17+
INPUT (one JSON object):
18+
{
19+
"stringId": <string>,
20+
"source": <string>,
21+
"description": <string> // context note, may be empty
22+
}
23+
24+
TASK
25+
1. Evaluate **description** only (ignore "source").
26+
2. Apply these PASS/FAIL checks:
27+
• Removes every ambiguity from the source.
28+
• Explicitly disambiguates noun vs. verb usage when relevant.
29+
• States who performs the action vs. who receives it if ambiguous.
30+
• Defines any uncommon acronym or technical term once.
31+
• Contains ONLY disambiguation; no intent, no UX writing rationale.
32+
33+
SCORING
34+
• 0 = BAD — any lingering ambiguity or missing clarification.
35+
• 1 = GOOD — full, concise context enabling a perfect translation.
36+
37+
OUTPUT (JSON):
38+
{
39+
"score": <0|1>,
40+
"explanation": "<one or two sentences explaining key pass/fail reason>"
41+
}
42+
43+
RULES
44+
• If description is empty or fails any single check, return score 0.
45+
• Keep explanation short (≤ 40 words).
46+
• Do NOT modify the input.
47+
""",
48+
AiReviewBasicRating.class),
49+
SOURCE_RATING(
50+
"Validate source content for ICU message format compliance and basic grammar",
51+
false,
52+
"""
53+
You will receive one JSON object with the following fields:
54+
• "stringId": Stable identifier of the string (string)
55+
• "source": Source-language text to translate (string)
56+
• "description":Context note for translators (string; may be empty)
57+
58+
Your task is to evaluate **source**
59+
• Grammar, spelling, tone, consistent punctuation.
60+
• Evaluate ICU Message Format correctness and appropriately used (e.g. for plural strings)
61+
62+
Score:
63+
0 = bad - Grammar, spelling, tone, consistent punctuation issues. bad internationalization like missing pluralization
64+
1 = good - ready for translation
65+
66+
Explanation:
67+
Describe the issue with the source. Suggest message format improvement when applicable.
68+
""",
69+
AiReviewBasicRating.class),
70+
GLOSSARY_EXTRACTION(
71+
"Identify and extract potential glossary terms from source content",
72+
false,
73+
"""
74+
1. Context
75+
You are a senior software‑localization linguist. Your job is to analyze English source strings for a product and decide whether any term in each string should be added to a translation glossary (a list of terms that must remain consistent across all languages).
76+
77+
2. Objective
78+
For every source string provided, determine which term(s), if any, warrant glossary entry, explain why, and assign a confidence score to your decision.
79+
80+
3. What Counts as a “Glossary Term”
81+
- Product or feature‑specific names (e.g., “SmartSync”)
82+
- Branded technologies or libraries (e.g., “GraphQL”)
83+
- Fixed UI element names that must stay consistent (e.g., “Settings”, “Inbox”)
84+
- Regulatory or legal terms that must be translated uniformly (e.g., “Privacy Policy”)
85+
- Acronyms or abbreviations that will recur (e.g., “OTP”, “API”)
86+
- Exclude generic verbs, adjectives, or normal nouns (e.g., “click”, “fast”, “user”).
87+
""",
88+
AiReviewGlossaryOutput.class);
89+
90+
public static final String PROMPT_ALL =
91+
"""
92+
Your role is to act as a translator.
93+
You are tasked with translating provided source strings while preserving both the tone and the technical structure of the string. This includes protecting any tags, placeholders, or code elements that should not be translated.
94+
95+
The input will be provided in JSON format with the following fields:
96+
97+
• "source": The source text to be translated.
98+
• "locale": The target language locale, following the BCP47 standard (e.g., “fr”, “es-419”).
99+
• "sourceDescription": A description providing context for the source text.
100+
• "existingTarget" (optional): An existing review to review.
101+
102+
Instructions:
103+
104+
• If the source is colloquial, keep the review colloquial; if it’s formal, maintain formality in the review.
105+
• Pay attention to regional variations specified in the "locale" field (e.g., “es” vs. “es-419”, “fr” vs. “fr-CA”, “zh” vs. “zh-Hant”), and ensure the review length remains similar to the source text.
106+
• Aim to provide the best review, while compromising on length to ensure it remains close to the original text length
107+
108+
Handling Tags and Code:
109+
110+
Some strings contain code elements such as tags (e.g., {atag}, ICU message format, or HTML tags). You are provided with a inputs of tags that need to be protected. Ensure that:
111+
112+
• Tags like {atag} remain untouched.
113+
• In cases of nested content (e.g., <a href={url}>text that needs review</a>), only translate the inner text while preserving the outer structure.
114+
• Complex structures like ICU message formats should have placeholders or variables left intact (e.g., {count, plural, one {# item} other {# items}}), but translate any inner translatable text.
115+
116+
Ambiguity and Context:
117+
118+
After translating, assess the usefulness of the "sourceDescription" field:
119+
120+
• Rate its usefulness on a scale of 0 to 2:
121+
• 0 – Not helpful at all; irrelevant or misleading.
122+
• 1 – Somewhat helpful; provides partial or unclear context but is useful to some extent.
123+
• 2 – Very helpful; provides clear and sufficient guidance for the review.
124+
125+
You are responsible for detecting and surfacing ambiguity that could affect translation quality. This includes:
126+
127+
• Missing subject or unclear agent (e.g., "Think before responding" – is the speaker, user, or system doing the thinking?).
128+
• Unclear object or target (e.g., "Submit" – submit what? A form, feedback, or a file?).
129+
• Grammar-dependent parts of speech (e.g., "record" as noun vs. verb).
130+
• Cultural tone that shifts depending on role (e.g., system-generated messages vs. peer-to-peer tone).
131+
132+
If the source is ambiguous or underspecified:
133+
134+
• Clearly describe the ambiguity in your explanation.
135+
• Provide alternative translations for each plausible interpretation.
136+
• Set "reviewRequired" to `true`, and explain why clarification is needed.
137+
138+
Use examples from the "sourceDescription" to resolve ambiguity whenever possible. If the description doesn’t help, note that explicitly.
139+
140+
You will provide an output in JSON format with the following fields:
141+
142+
• "source": The original source text.
143+
• "target": An object containing:
144+
• "content": The best review.
145+
• "explanation": A brief explanation of your review choices.
146+
• "confidenceLevel": Your confidence level (0-100%) in the review.
147+
• "descriptionRating": An object containing:
148+
• "explanation": An explanation of how the "sourceDescription" aided your review.
149+
• "score": The usefulness score (0-2).
150+
• "altTarget": An object containing:
151+
• "content": An alternative review, if applicable. Focus on showcasing grammar differences,
152+
• "explanation": Explanation for the alternative review.
153+
• "confidenceLevel": Your confidence level (0-100%) in the alternative review.
154+
• "existingTargetRating" (if "existingTarget" is provided): An object containing:
155+
• "explanation": Feedback on the existing review’s accuracy and quality.
156+
• "score": A rating score (0-2).
157+
• "reviewRequired": An object containing:
158+
• "required": true or false, indicating if review is needed.
159+
• "reason": A detailed explanation of why review is or isn’t needed.
160+
""";
161+
162+
final String description;
163+
final boolean forTextUnitVariantReview;
164+
final String prompt;
165+
final Class<?> outputJsonSchemaClass;
166+
167+
AiReviewType(
168+
String description,
169+
boolean forTextUnitVariantReview,
170+
String prompt,
171+
Class<?> outputJsonSchemaClass) {
172+
this.description = description;
173+
this.forTextUnitVariantReview = forTextUnitVariantReview;
174+
this.prompt = prompt;
175+
this.outputJsonSchemaClass = outputJsonSchemaClass;
176+
}
177+
178+
public static AiReviewType fromString(String name) {
179+
for (AiReviewType type : AiReviewType.values()) {
180+
if (type.name().equalsIgnoreCase(name)) {
181+
return type;
182+
}
183+
}
184+
throw new IllegalArgumentException("No AiReviewType enum constant for name: " + name);
185+
}
186+
187+
public String getDescription() {
188+
return description;
189+
}
190+
191+
public boolean isForTextUnitVariantReview() {
192+
return forTextUnitVariantReview;
193+
}
194+
195+
public String getPrompt() {
196+
return prompt;
197+
}
198+
199+
public Class<?> getOutputJsonSchemaClass() {
200+
return outputJsonSchemaClass;
201+
}
202+
203+
public record AiReviewTextUnitVariantOutput(
204+
String source,
205+
Target target,
206+
DescriptionRating descriptionRating,
207+
AltTarget altTarget,
208+
ExistingTargetRating existingTargetRating,
209+
ReviewRequired reviewRequired) {
210+
record Target(String content, String explanation, int confidenceLevel) {}
211+
212+
record AltTarget(String content, String explanation, int confidenceLevel) {}
213+
214+
record DescriptionRating(String explanation, int score) {}
215+
216+
record ExistingTargetRating(String explanation, int score) {}
217+
218+
record ReviewRequired(boolean required, String reason) {}
219+
}
220+
221+
public record AiReviewBasicRating(long rating, String explanation) {}
222+
223+
public record AiReviewGlossaryOutput(List<Term> terms) {
224+
public record Term(String term, String explanation, int confidence) {}
225+
}
226+
}

0 commit comments

Comments
 (0)