Skip to content

Commit 24cb705

Browse files
committed
contry and language code mapping
1 parent e9c3021 commit 24cb705

File tree

4 files changed

+508
-24
lines changed

4 files changed

+508
-24
lines changed

web-search/brave/src/conversions.rs

Lines changed: 149 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ use golem_web_search::golem::web_search::types::{
88
pub fn convert_params_to_request(params: &SearchParams, offset: Option<u32>) -> BraveSearchRequest {
99
let mut request = BraveSearchRequest {
1010
q: params.query.clone(),
11-
country: params.region.clone(),
12-
search_lang: params.language.clone(),
13-
ui_lang: params.language.clone(),
11+
country: country_code_to_brave(params.region.as_ref().unwrap_or(&"us".to_string())),
12+
search_lang: language_code_to_brave(params.language.as_ref().unwrap_or(&"en".to_string())),
13+
ui_lang: None,
1414
count: params.max_results,
1515
offset,
1616
safesearch: params.safe_search.as_ref().map(|s| match s {
@@ -54,6 +54,152 @@ pub fn convert_params_to_request(params: &SearchParams, offset: Option<u32>) ->
5454
request
5555
}
5656

57+
pub fn country_code_to_brave(country_code: &str) -> Option<String> {
58+
match country_code.to_lowercase().as_str() {
59+
"us" | "usa" | "united states" => Some("us".to_string()),
60+
"uk" | "gb" | "united kingdom" => Some("gb".to_string()),
61+
"ca" | "canada" => Some("ca".to_string()),
62+
"au" | "australia" => Some("au".to_string()),
63+
"de" | "germany" => Some("de".to_string()),
64+
"fr" | "france" => Some("fr".to_string()),
65+
"es" | "spain" => Some("es".to_string()),
66+
"it" | "italy" => Some("it".to_string()),
67+
"jp" | "japan" => Some("jp".to_string()),
68+
"br" | "brazil" => Some("br".to_string()),
69+
"in" | "india" => Some("in".to_string()),
70+
"cn" | "china" => Some("cn".to_string()),
71+
"ru" | "russia" => Some("ru".to_string()),
72+
"mx" | "mexico" => Some("mx".to_string()),
73+
"ar" | "argentina" => Some("ar".to_string()),
74+
"cl" | "chile" => Some("cl".to_string()),
75+
"co" | "colombia" => Some("co".to_string()),
76+
"pe" | "peru" => Some("pe".to_string()),
77+
"za" | "south africa" => Some("za".to_string()),
78+
"ng" | "nigeria" => Some("ng".to_string()),
79+
"eg" | "egypt" => Some("eg".to_string()),
80+
"kr" | "south korea" => Some("kr".to_string()),
81+
"th" | "thailand" => Some("th".to_string()),
82+
"sg" | "singapore" => Some("sg".to_string()),
83+
"my" | "malaysia" => Some("my".to_string()),
84+
"id" | "indonesia" => Some("id".to_string()),
85+
"ph" | "philippines" => Some("ph".to_string()),
86+
"vn" | "vietnam" => Some("vn".to_string()),
87+
"tw" | "taiwan" => Some("tw".to_string()),
88+
"hk" | "hong kong" => Some("hk".to_string()),
89+
"nl" | "netherlands" => Some("nl".to_string()),
90+
"be" | "belgium" => Some("be".to_string()),
91+
"ch" | "switzerland" => Some("ch".to_string()),
92+
"at" | "austria" => Some("at".to_string()),
93+
"se" | "sweden" => Some("se".to_string()),
94+
"no" | "norway" => Some("no".to_string()),
95+
"dk" | "denmark" => Some("dk".to_string()),
96+
"fi" | "finland" => Some("fi".to_string()),
97+
"pl" | "poland" => Some("pl".to_string()),
98+
"cz" | "czech republic" => Some("cz".to_string()),
99+
"hu" | "hungary" => Some("hu".to_string()),
100+
"gr" | "greece" => Some("gr".to_string()),
101+
"pt" | "portugal" => Some("pt".to_string()),
102+
"tr" | "turkey" => Some("tr".to_string()),
103+
"il" | "israel" => Some("il".to_string()),
104+
"ae" | "uae" | "united arab emirates" => Some("ae".to_string()),
105+
"sa" | "saudi arabia" => Some("sa".to_string()),
106+
"nz" | "new zealand" => Some("nz".to_string()),
107+
_ => Some(country_code.to_lowercase()),
108+
}
109+
}
110+
111+
pub fn language_code_to_brave(language_code: &str) -> Option<String> {
112+
let input = language_code.to_lowercase();
113+
114+
let lang_code = if input.starts_with("lang_") {
115+
input.strip_prefix("lang_").unwrap_or(&input)
116+
} else {
117+
&input
118+
};
119+
120+
match lang_code {
121+
"en" | "english" => Some("en".to_string()),
122+
"es" | "spanish" => Some("es".to_string()),
123+
"fr" | "french" => Some("fr".to_string()),
124+
"de" | "german" => Some("de".to_string()),
125+
"it" | "italian" => Some("it".to_string()),
126+
"pt" | "portuguese" => Some("pt".to_string()),
127+
"ru" | "russian" => Some("ru".to_string()),
128+
"zh" | "chinese" => Some("zh".to_string()),
129+
"ja" | "japanese" => Some("ja".to_string()),
130+
"ko" | "korean" => Some("ko".to_string()),
131+
"ar" | "arabic" => Some("ar".to_string()),
132+
"hi" | "hindi" => Some("hi".to_string()),
133+
"th" | "thai" => Some("th".to_string()),
134+
"vi" | "vietnamese" => Some("vi".to_string()),
135+
"id" | "indonesian" => Some("id".to_string()),
136+
"ms" | "malay" => Some("ms".to_string()),
137+
"tl" | "tagalog" => Some("tl".to_string()),
138+
"nl" | "dutch" => Some("nl".to_string()),
139+
"sv" | "swedish" => Some("sv".to_string()),
140+
"no" | "norwegian" => Some("no".to_string()),
141+
"da" | "danish" => Some("da".to_string()),
142+
"fi" | "finnish" => Some("fi".to_string()),
143+
"pl" | "polish" => Some("pl".to_string()),
144+
"cs" | "czech" => Some("cs".to_string()),
145+
"hu" | "hungarian" => Some("hu".to_string()),
146+
"el" | "greek" => Some("el".to_string()),
147+
"tr" | "turkish" => Some("tr".to_string()),
148+
"he" | "hebrew" => Some("he".to_string()),
149+
"fa" | "persian" => Some("fa".to_string()),
150+
"ur" | "urdu" => Some("ur".to_string()),
151+
"bn" | "bengali" => Some("bn".to_string()),
152+
"ta" | "tamil" => Some("ta".to_string()),
153+
"te" | "telugu" => Some("te".to_string()),
154+
"ml" | "malayalam" => Some("ml".to_string()),
155+
"kn" | "kannada" => Some("kn".to_string()),
156+
"gu" | "gujarati" => Some("gu".to_string()),
157+
"pa" | "punjabi" => Some("pa".to_string()),
158+
"mr" | "marathi" => Some("mr".to_string()),
159+
"ne" | "nepali" => Some("ne".to_string()),
160+
"si" | "sinhala" => Some("si".to_string()),
161+
"my" | "myanmar" => Some("my".to_string()),
162+
"km" | "khmer" => Some("km".to_string()),
163+
"lo" | "lao" => Some("lo".to_string()),
164+
"ka" | "georgian" => Some("ka".to_string()),
165+
"hy" | "armenian" => Some("hy".to_string()),
166+
"az" | "azerbaijani" => Some("az".to_string()),
167+
"kk" | "kazakh" => Some("kk".to_string()),
168+
"ky" | "kyrgyz" => Some("ky".to_string()),
169+
"mn" | "mongolian" => Some("mn".to_string()),
170+
"uz" | "uzbek" => Some("uz".to_string()),
171+
"uk" | "ukrainian" => Some("uk".to_string()),
172+
"bg" | "bulgarian" => Some("bg".to_string()),
173+
"hr" | "croatian" => Some("hr".to_string()),
174+
"sr" | "serbian" => Some("sr".to_string()),
175+
"bs" | "bosnian" => Some("bs".to_string()),
176+
"mk" | "macedonian" => Some("mk".to_string()),
177+
"sl" | "slovenian" => Some("sl".to_string()),
178+
"sk" | "slovak" => Some("sk".to_string()),
179+
"ro" | "romanian" => Some("ro".to_string()),
180+
"lv" | "latvian" => Some("lv".to_string()),
181+
"lt" | "lithuanian" => Some("lt".to_string()),
182+
"et" | "estonian" => Some("et".to_string()),
183+
"mt" | "maltese" => Some("mt".to_string()),
184+
"is" | "icelandic" => Some("is".to_string()),
185+
"ga" | "irish" => Some("ga".to_string()),
186+
"cy" | "welsh" => Some("cy".to_string()),
187+
"eu" | "basque" => Some("eu".to_string()),
188+
"ca" | "catalan" => Some("ca".to_string()),
189+
"gl" | "galician" => Some("gl".to_string()),
190+
"af" | "afrikaans" => Some("af".to_string()),
191+
"sw" | "swahili" => Some("sw".to_string()),
192+
"am" | "amharic" => Some("am".to_string()),
193+
"or" | "oriya" => Some("or".to_string()),
194+
"as" | "assamese" => Some("as".to_string()),
195+
"sd" | "sindhi" => Some("sd".to_string()),
196+
"ps" | "pashto" => Some("ps".to_string()),
197+
"tg" | "tajik" => Some("tg".to_string()),
198+
"tk" | "turkmen" => Some("tk".to_string()),
199+
_ => Some(lang_code.to_string()),
200+
}
201+
}
202+
57203
pub fn convert_response_to_results(
58204
response: BraveSearchResponse,
59205
params: &SearchParams,

web-search/google/src/conversions.rs

Lines changed: 147 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,8 @@ pub fn convert_params_to_request(
2020
SafeSearchLevel::Medium => "medium".to_string(),
2121
SafeSearchLevel::High => "high".to_string(),
2222
}),
23-
lr: params
24-
.language
25-
.as_ref()
26-
.map(|lang| format!("lang_{}", lang)),
27-
gl: params.region.clone(),
23+
lr: language_code_to_google(params.language.as_ref().unwrap_or(&"en".to_string())),
24+
gl: country_code_to_google(params.region.as_ref().unwrap_or(&"us".to_string())),
2825
date_restrict: params.time_range.as_ref().map(|tr| match tr {
2926
TimeRange::Day => "d1".to_string(),
3027
TimeRange::Week => "w1".to_string(),
@@ -62,6 +59,151 @@ pub fn convert_params_to_request(
6259
request
6360
}
6461

62+
pub fn country_code_to_google(country_code: &str) -> Option<String> {
63+
match country_code.to_lowercase().as_str() {
64+
"us" | "usa" | "united states" => Some("us".to_string()),
65+
"uk" | "gb" | "united kingdom" => Some("uk".to_string()),
66+
"ca" | "canada" => Some("ca".to_string()),
67+
"au" | "australia" => Some("au".to_string()),
68+
"de" | "germany" => Some("de".to_string()),
69+
"fr" | "france" => Some("fr".to_string()),
70+
"es" | "spain" => Some("es".to_string()),
71+
"it" | "italy" => Some("it".to_string()),
72+
"jp" | "japan" => Some("jp".to_string()),
73+
"br" | "brazil" => Some("br".to_string()),
74+
"in" | "india" => Some("in".to_string()),
75+
"cn" | "china" => Some("cn".to_string()),
76+
"ru" | "russia" => Some("ru".to_string()),
77+
"mx" | "mexico" => Some("mx".to_string()),
78+
"ar" | "argentina" => Some("ar".to_string()),
79+
"cl" | "chile" => Some("cl".to_string()),
80+
"co" | "colombia" => Some("co".to_string()),
81+
"pe" | "peru" => Some("pe".to_string()),
82+
"za" | "south africa" => Some("za".to_string()),
83+
"ng" | "nigeria" => Some("ng".to_string()),
84+
"eg" | "egypt" => Some("eg".to_string()),
85+
"kr" | "south korea" => Some("kr".to_string()),
86+
"th" | "thailand" => Some("th".to_string()),
87+
"sg" | "singapore" => Some("sg".to_string()),
88+
"my" | "malaysia" => Some("my".to_string()),
89+
"id" | "indonesia" => Some("id".to_string()),
90+
"ph" | "philippines" => Some("ph".to_string()),
91+
"vn" | "vietnam" => Some("vn".to_string()),
92+
"tw" | "taiwan" => Some("tw".to_string()),
93+
"hk" | "hong kong" => Some("hk".to_string()),
94+
"nl" | "netherlands" => Some("nl".to_string()),
95+
"be" | "belgium" => Some("be".to_string()),
96+
"ch" | "switzerland" => Some("ch".to_string()),
97+
"at" | "austria" => Some("at".to_string()),
98+
"se" | "sweden" => Some("se".to_string()),
99+
"no" | "norway" => Some("no".to_string()),
100+
"dk" | "denmark" => Some("dk".to_string()),
101+
"fi" | "finland" => Some("fi".to_string()),
102+
"pl" | "poland" => Some("pl".to_string()),
103+
"cz" | "czech republic" => Some("cz".to_string()),
104+
"hu" | "hungary" => Some("hu".to_string()),
105+
"gr" | "greece" => Some("gr".to_string()),
106+
"pt" | "portugal" => Some("pt".to_string()),
107+
"tr" | "turkey" => Some("tr".to_string()),
108+
"il" | "israel" => Some("il".to_string()),
109+
"ae" | "uae" | "united arab emirates" => Some("ae".to_string()),
110+
"sa" | "saudi arabia" => Some("sa".to_string()),
111+
"nz" | "new zealand" => Some("nz".to_string()),
112+
_ => Some(country_code.to_lowercase()),
113+
}
114+
}
115+
116+
pub fn language_code_to_google(language_code: &str) -> Option<String> {
117+
let input = language_code.to_lowercase();
118+
119+
if input.starts_with("lang_") {
120+
return Some(language_code.to_string());
121+
}
122+
123+
let lang_code = match input.as_str() {
124+
"en" | "english" => "en",
125+
"es" | "spanish" => "es",
126+
"fr" | "french" => "fr",
127+
"de" | "german" => "de",
128+
"it" | "italian" => "it",
129+
"pt" | "portuguese" => "pt",
130+
"ru" | "russian" => "ru",
131+
"zh" | "chinese" => "zh",
132+
"ja" | "japanese" => "ja",
133+
"ko" | "korean" => "ko",
134+
"ar" | "arabic" => "ar",
135+
"hi" | "hindi" => "hi",
136+
"th" | "thai" => "th",
137+
"vi" | "vietnamese" => "vi",
138+
"id" | "indonesian" => "id",
139+
"ms" | "malay" => "ms",
140+
"tl" | "tagalog" => "tl",
141+
"nl" | "dutch" => "nl",
142+
"sv" | "swedish" => "sv",
143+
"no" | "norwegian" => "no",
144+
"da" | "danish" => "da",
145+
"fi" | "finnish" => "fi",
146+
"pl" | "polish" => "pl",
147+
"cs" | "czech" => "cs",
148+
"hu" | "hungarian" => "hu",
149+
"el" | "greek" => "el",
150+
"tr" | "turkish" => "tr",
151+
"he" | "hebrew" => "he",
152+
"fa" | "persian" => "fa",
153+
"ur" | "urdu" => "ur",
154+
"bn" | "bengali" => "bn",
155+
"ta" | "tamil" => "ta",
156+
"te" | "telugu" => "te",
157+
"ml" | "malayalam" => "ml",
158+
"kn" | "kannada" => "kn",
159+
"gu" | "gujarati" => "gu",
160+
"pa" | "punjabi" => "pa",
161+
"mr" | "marathi" => "mr",
162+
"ne" | "nepali" => "ne",
163+
"si" | "sinhala" => "si",
164+
"my" | "myanmar" => "my",
165+
"km" | "khmer" => "km",
166+
"lo" | "lao" => "lo",
167+
"ka" | "georgian" => "ka",
168+
"hy" | "armenian" => "hy",
169+
"az" | "azerbaijani" => "az",
170+
"kk" | "kazakh" => "kk",
171+
"ky" | "kyrgyz" => "ky",
172+
"mn" | "mongolian" => "mn",
173+
"uz" | "uzbek" => "uz",
174+
"uk" | "ukrainian" => "uk",
175+
"bg" | "bulgarian" => "bg",
176+
"hr" | "croatian" => "hr",
177+
"sr" | "serbian" => "sr",
178+
"bs" | "bosnian" => "bs",
179+
"mk" | "macedonian" => "mk",
180+
"sl" | "slovenian" => "sl",
181+
"sk" | "slovak" => "sk",
182+
"ro" | "romanian" => "ro",
183+
"lv" | "latvian" => "lv",
184+
"lt" | "lithuanian" => "lt",
185+
"et" | "estonian" => "et",
186+
"mt" | "maltese" => "mt",
187+
"is" | "icelandic" => "is",
188+
"ga" | "irish" => "ga",
189+
"cy" | "welsh" => "cy",
190+
"eu" | "basque" => "eu",
191+
"ca" | "catalan" => "ca",
192+
"gl" | "galician" => "gl",
193+
"af" | "afrikaans" => "af",
194+
"sw" | "swahili" => "sw",
195+
"am" | "amharic" => "am",
196+
"or" | "oriya" => "or",
197+
"as" | "assamese" => "as",
198+
"sd" | "sindhi" => "sd",
199+
"ps" | "pashto" => "ps",
200+
"tg" | "tajik" => "tg",
201+
"tk" | "turkmen" => "tk",
202+
_ => &input,
203+
};
204+
Some(format!("lang_{}", lang_code))
205+
}
206+
65207
pub fn convert_response_to_results(
66208
response: GoogleSearchResponse,
67209
params: &SearchParams,

0 commit comments

Comments
 (0)