diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5d664e723..a8ba3e87d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -137,7 +137,7 @@ jobs: - name: Build and test Ollama integration run: | set -e - cargo make --cwd llm build-ollama + cargo make build cd test golem-cli app build -b ollama-debug golem-cli app deploy -b ollama-debug diff --git a/Cargo.lock b/Cargo.lock index 0865d6ade..52efe4a7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -457,6 +457,76 @@ dependencies = [ "wit-bindgen-rt 0.40.0", ] +[[package]] +name = "golem-web-search" +version = "0.0.0" +dependencies = [ + "golem-rust", + "log", + "mime", + "nom", + "reqwest", + "thiserror", + "wasi-logger", + "wit-bindgen 0.40.0", +] + +[[package]] +name = "golem-websearch-brave" +version = "0.0.0" +dependencies = [ + "base64 0.22.1", + "golem-rust", + "golem-web-search", + "log", + "reqwest", + "serde", + "serde_json", + "wit-bindgen-rt 0.40.0", +] + +[[package]] +name = "golem-websearch-google" +version = "0.0.0" +dependencies = [ + "base64 0.22.1", + "golem-rust", + "golem-web-search", + "log", + "reqwest", + "serde", + "serde_json", + "wit-bindgen-rt 0.40.0", +] + +[[package]] +name = "golem-websearch-serper" +version = "0.0.0" +dependencies = [ + "base64 0.22.1", + "golem-rust", + "golem-web-search", + "log", + "reqwest", + "serde", + "serde_json", + "wit-bindgen-rt 0.40.0", +] + +[[package]] +name = "golem-websearch-tavily" +version = "0.0.0" +dependencies = [ + "base64 0.22.1", + "golem-rust", + "golem-web-search", + "log", + "reqwest", + "serde", + "serde_json", + "wit-bindgen-rt 0.40.0", +] + [[package]] name = "hashbrown" version = "0.15.3" diff --git a/Cargo.toml b/Cargo.toml index 7bea1e1e5..340ff471c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,11 @@ members = [ "llm/ollama", "llm/openai", "llm/openrouter", + "web-search/web-search", + "web-search/brave", + "web-search/google", + "web-search/serper", + "web-search/tavily", ] [profile.release] diff --git a/Makefile.toml b/Makefile.toml index cc443bc6a..7042e16b9 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -13,7 +13,7 @@ args = ["test"] [tasks.build] script_runner = "@duckscript" script = ''' -domains = array llm +domains = array llm web-search # if there is no domain passed run for every domain if is_empty ${1} @@ -28,7 +28,7 @@ end [tasks.release-build] script_runner = "@duckscript" script = ''' -domains = array llm +domains = array llm web-search # if there is no domain passed run for every domain if is_empty ${1} @@ -44,7 +44,7 @@ end script_runner = "@duckscript" script = ''' #!/bin/bash -domains = array llm +domains = array llm web-search # if there is no domain passed run for every domain if is_empty ${1} @@ -60,7 +60,7 @@ end script_runner = "@duckscript" script = ''' #!/bin/bash -domains = array llm +domains = array llm web-search # if there is no domain passed run for every domain if is_empty ${1} @@ -75,7 +75,7 @@ end [tasks.wit] script_runner = "@duckscript" script = ''' -domains = array llm +domains = array llm web-search # if there is no domain passed run for every domain if is_empty ${1} @@ -91,7 +91,7 @@ end description = "Builds all test components with golem-cli" script_runner = "@duckscript" script = ''' -domains = array llm +domains = array llm web-search # if there is no domain passed run for every domain if is_empty ${1} @@ -137,7 +137,7 @@ script = ''' is_portable = eq ${1} "--portable" -targets = array llm_openai llm_anthropic llm_grok llm_openrouter llm_ollama +targets = array llm_openai llm_anthropic llm_grok llm_openrouter llm_ollama websearch_google websearch_brave websearch_tavily websearch_serper for target in ${targets} if is_portable cp target/wasm32-wasip1/debug/golem_${target}.wasm components/debug/golem_${target}-portable.wasm @@ -153,7 +153,7 @@ script = ''' is_portable = eq ${1} "--portable" -targets = array llm_openai llm_anthropic llm_grok llm_openrouter llm_ollama +targets = array llm_openai llm_anthropic llm_grok llm_openrouter llm_ollama websearch_google websearch_brave websearch_tavily websearch_serper for target in ${targets} if is_portable cp target/wasm32-wasip1/release/golem_${target}.wasm components/release/golem_${target}-portable.wasm diff --git a/llm/anthropic/src/bindings.rs b/llm/anthropic/src/bindings.rs index 70c5f1fd5..1a54d6167 100644 --- a/llm/anthropic/src/bindings.rs +++ b/llm/anthropic/src/bindings.rs @@ -1,12 +1,15 @@ -// Generated by `wit-bindgen` 0.36.0. DO NOT EDIT! +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! // Options used: // * runtime_path: "wit_bindgen_rt" // * with "golem:llm/llm@1.0.0" = "golem_llm::golem::llm::llm" // * generate_unused_types use golem_llm::golem::llm::llm as __with_name0; #[cfg(target_arch = "wasm32")] -#[link_section = "component-type:wit-bindgen:0.36.0:golem:llm-anthropic@1.0.0:llm-library:encoded world"] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:llm-anthropic@1.0.0:llm-library:encoded world" +)] #[doc(hidden)] +#[allow(clippy::octal_escapes)] pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1762] = *b"\ \0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xe0\x0c\x01A\x02\x01\ A\x02\x01BO\x01m\x04\x04user\x09assistant\x06system\x04tool\x04\0\x04role\x03\0\0\ @@ -43,8 +46,8 @@ ng-get-next\x01B\x01p\x15\x01@\x02\x08messages\xc3\0\x06config)\06\x04\0\x04send \0\x06config)\06\x04\0\x08continue\x01G\x01i=\x01@\x02\x08messages\xc3\0\x06conf\ ig)\0\xc8\0\x04\0\x06stream\x01I\x04\0\x13golem:llm/llm@1.0.0\x05\0\x04\0%golem:\ llm-anthropic/llm-library@1.0.0\x04\0\x0b\x11\x01\0\x0bllm-library\x03\0\0\0G\x09\ -producers\x01\x0cprocessed-by\x02\x0dwit-component\x070.220.0\x10wit-bindgen-rus\ -t\x060.36.0"; +producers\x01\x0cprocessed-by\x02\x0dwit-component\x070.227.1\x10wit-bindgen-rus\ +t\x060.41.0"; #[inline(never)] #[doc(hidden)] pub fn __link_custom_section_describing_imports() { diff --git a/llm/anthropic/src/conversions.rs b/llm/anthropic/src/conversions.rs index e332f1391..e7d3175a0 100644 --- a/llm/anthropic/src/conversions.rs +++ b/llm/anthropic/src/conversions.rs @@ -130,7 +130,7 @@ pub fn process_response(response: MessagesResponse) -> ChatEvent { Err(e) => { return ChatEvent::Error(Error { code: ErrorCode::InvalidRequest, - message: format!("Failed to decode base64 image data: {}", e), + message: format!("Failed to decode base64 image data: {e}"), provider_error_json: None, }); } diff --git a/llm/grok/src/bindings.rs b/llm/grok/src/bindings.rs index 2a101583e..c2f601347 100644 --- a/llm/grok/src/bindings.rs +++ b/llm/grok/src/bindings.rs @@ -1,12 +1,15 @@ -// Generated by `wit-bindgen` 0.36.0. DO NOT EDIT! +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! // Options used: // * runtime_path: "wit_bindgen_rt" // * with "golem:llm/llm@1.0.0" = "golem_llm::golem::llm::llm" // * generate_unused_types use golem_llm::golem::llm::llm as __with_name0; #[cfg(target_arch = "wasm32")] -#[link_section = "component-type:wit-bindgen:0.36.0:golem:llm-grok@1.0.0:llm-library:encoded world"] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:llm-grok@1.0.0:llm-library:encoded world" +)] #[doc(hidden)] +#[allow(clippy::octal_escapes)] pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1757] = *b"\ \0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xdb\x0c\x01A\x02\x01\ A\x02\x01BO\x01m\x04\x04user\x09assistant\x06system\x04tool\x04\0\x04role\x03\0\0\ @@ -43,8 +46,8 @@ ng-get-next\x01B\x01p\x15\x01@\x02\x08messages\xc3\0\x06config)\06\x04\0\x04send \0\x06config)\06\x04\0\x08continue\x01G\x01i=\x01@\x02\x08messages\xc3\0\x06conf\ ig)\0\xc8\0\x04\0\x06stream\x01I\x04\0\x13golem:llm/llm@1.0.0\x05\0\x04\0\x20gol\ em:llm-grok/llm-library@1.0.0\x04\0\x0b\x11\x01\0\x0bllm-library\x03\0\0\0G\x09p\ -roducers\x01\x0cprocessed-by\x02\x0dwit-component\x070.220.0\x10wit-bindgen-rust\ -\x060.36.0"; +roducers\x01\x0cprocessed-by\x02\x0dwit-component\x070.227.1\x10wit-bindgen-rust\ +\x060.41.0"; #[inline(never)] #[doc(hidden)] pub fn __link_custom_section_describing_imports() { diff --git a/llm/grok/src/conversions.rs b/llm/grok/src/conversions.rs index 68a5d570c..129c128ad 100644 --- a/llm/grok/src/conversions.rs +++ b/llm/grok/src/conversions.rs @@ -183,7 +183,7 @@ fn convert_content_parts(contents: Vec) -> crate::client::Content { let media_type = &image_source.mime_type; // This is already a string result.push(crate::client::ContentPart::ImageInput { image_url: crate::client::ImageUrl { - url: format!("data:{};base64,{}", media_type, base64_data), + url: format!("data:{media_type};base64,{base64_data}"), detail: image_source.detail.map(|d| d.into()), }, }); diff --git a/llm/llm/src/event_source/ndjson_stream.rs b/llm/llm/src/event_source/ndjson_stream.rs index e2f4cc1b2..1b8ef3773 100644 --- a/llm/llm/src/event_source/ndjson_stream.rs +++ b/llm/llm/src/event_source/ndjson_stream.rs @@ -126,7 +126,7 @@ fn try_parse_line( return Ok(None); } - trace!("Parsed NDJSON line: {}", line); + trace!("Parsed NDJSON line: {line}"); // Create a MessageEvent with the JSON line as data let event = MessageEvent { diff --git a/llm/llm/src/event_source/stream.rs b/llm/llm/src/event_source/stream.rs index 8f2933676..13a5eeb56 100644 --- a/llm/llm/src/event_source/stream.rs +++ b/llm/llm/src/event_source/stream.rs @@ -56,9 +56,9 @@ where { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Utf8(err) => f.write_fmt(format_args!("UTF8 error: {}", err)), - Self::Parser(err) => f.write_fmt(format_args!("Parse error: {}", err)), - Self::Transport(err) => f.write_fmt(format_args!("Transport error: {}", err)), + Self::Utf8(err) => f.write_fmt(format_args!("UTF8 error: {err}")), + Self::Parser(err) => f.write_fmt(format_args!("Parse error: {err}")), + Self::Transport(err) => f.write_fmt(format_args!("Transport error: {err}")), } } } diff --git a/llm/ollama/src/bindings.rs b/llm/ollama/src/bindings.rs index dbb704704..269cd07fb 100644 --- a/llm/ollama/src/bindings.rs +++ b/llm/ollama/src/bindings.rs @@ -1,12 +1,15 @@ -// Generated by `wit-bindgen` 0.36.0. DO NOT EDIT! +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! // Options used: // * runtime_path: "wit_bindgen_rt" // * with "golem:llm/llm@1.0.0" = "golem_llm::golem::llm::llm" // * generate_unused_types use golem_llm::golem::llm::llm as __with_name0; #[cfg(target_arch = "wasm32")] -#[link_section = "component-type:wit-bindgen:0.36.0:golem:llm-ollama@1.0.0:llm-library:encoded world"] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:llm-ollama@1.0.0:llm-library:encoded world" +)] #[doc(hidden)] +#[allow(clippy::octal_escapes)] pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1759] = *b"\ \0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xdd\x0c\x01A\x02\x01\ A\x02\x01BO\x01m\x04\x04user\x09assistant\x06system\x04tool\x04\0\x04role\x03\0\0\ @@ -43,8 +46,8 @@ ng-get-next\x01B\x01p\x15\x01@\x02\x08messages\xc3\0\x06config)\06\x04\0\x04send \0\x06config)\06\x04\0\x08continue\x01G\x01i=\x01@\x02\x08messages\xc3\0\x06conf\ ig)\0\xc8\0\x04\0\x06stream\x01I\x04\0\x13golem:llm/llm@1.0.0\x05\0\x04\0\"golem\ :llm-ollama/llm-library@1.0.0\x04\0\x0b\x11\x01\0\x0bllm-library\x03\0\0\0G\x09p\ -roducers\x01\x0cprocessed-by\x02\x0dwit-component\x070.220.0\x10wit-bindgen-rust\ -\x060.36.0"; +roducers\x01\x0cprocessed-by\x02\x0dwit-component\x070.227.1\x10wit-bindgen-rust\ +\x060.41.0"; #[inline(never)] #[doc(hidden)] pub fn __link_custom_section_describing_imports() { diff --git a/llm/ollama/src/client.rs b/llm/ollama/src/client.rs index e9514a8dd..e2901e70c 100644 --- a/llm/ollama/src/client.rs +++ b/llm/ollama/src/client.rs @@ -335,7 +335,7 @@ pub fn image_to_base64(source: &str) -> Result Error { Error { code: ErrorCode::InternalError, - message: format!("{}: {}", context, err), + message: format!("{context}: {err}"), provider_error_json: None, } } diff --git a/llm/ollama/src/conversions.rs b/llm/ollama/src/conversions.rs index b1db65c61..8d64e954f 100644 --- a/llm/ollama/src/conversions.rs +++ b/llm/ollama/src/conversions.rs @@ -214,7 +214,7 @@ pub fn process_response(response: CompletionsResponse) -> ChatEvent { }; ChatEvent::Message(CompleteResponse { - id: format!("ollama-{}", timestamp), + id: format!("ollama-{timestamp}"), content, tool_calls, metadata, diff --git a/llm/openai/src/bindings.rs b/llm/openai/src/bindings.rs index c960248a8..6d0a77280 100644 --- a/llm/openai/src/bindings.rs +++ b/llm/openai/src/bindings.rs @@ -1,12 +1,15 @@ -// Generated by `wit-bindgen` 0.36.0. DO NOT EDIT! +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! // Options used: // * runtime_path: "wit_bindgen_rt" // * with "golem:llm/llm@1.0.0" = "golem_llm::golem::llm::llm" // * generate_unused_types use golem_llm::golem::llm::llm as __with_name0; #[cfg(target_arch = "wasm32")] -#[link_section = "component-type:wit-bindgen:0.36.0:golem:llm-openai@1.0.0:llm-library:encoded world"] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:llm-openai@1.0.0:llm-library:encoded world" +)] #[doc(hidden)] +#[allow(clippy::octal_escapes)] pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1759] = *b"\ \0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xdd\x0c\x01A\x02\x01\ A\x02\x01BO\x01m\x04\x04user\x09assistant\x06system\x04tool\x04\0\x04role\x03\0\0\ @@ -43,8 +46,8 @@ ng-get-next\x01B\x01p\x15\x01@\x02\x08messages\xc3\0\x06config)\06\x04\0\x04send \0\x06config)\06\x04\0\x08continue\x01G\x01i=\x01@\x02\x08messages\xc3\0\x06conf\ ig)\0\xc8\0\x04\0\x06stream\x01I\x04\0\x13golem:llm/llm@1.0.0\x05\0\x04\0\"golem\ :llm-openai/llm-library@1.0.0\x04\0\x0b\x11\x01\0\x0bllm-library\x03\0\0\0G\x09p\ -roducers\x01\x0cprocessed-by\x02\x0dwit-component\x070.220.0\x10wit-bindgen-rust\ -\x060.36.0"; +roducers\x01\x0cprocessed-by\x02\x0dwit-component\x070.227.1\x10wit-bindgen-rust\ +\x060.41.0"; #[inline(never)] #[doc(hidden)] pub fn __link_custom_section_describing_imports() { diff --git a/llm/openai/src/conversions.rs b/llm/openai/src/conversions.rs index 43694c0f3..a4989b0c1 100644 --- a/llm/openai/src/conversions.rs +++ b/llm/openai/src/conversions.rs @@ -138,7 +138,7 @@ pub fn content_part_to_inner_input_item(content_part: ContentPart) -> InnerInput ImageReference::Inline(image_source) => { let base64_data = general_purpose::STANDARD.encode(&image_source.data); let mime_type = &image_source.mime_type; // This is already a string - let data_url = format!("data:{};base64,{}", mime_type, base64_data); + let data_url = format!("data:{mime_type};base64,{base64_data}"); InnerInputItem::ImageInput { image_url: data_url, diff --git a/llm/openrouter/src/bindings.rs b/llm/openrouter/src/bindings.rs index ba2accf7e..1300cde97 100644 --- a/llm/openrouter/src/bindings.rs +++ b/llm/openrouter/src/bindings.rs @@ -1,12 +1,15 @@ -// Generated by `wit-bindgen` 0.36.0. DO NOT EDIT! +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! // Options used: // * runtime_path: "wit_bindgen_rt" // * with "golem:llm/llm@1.0.0" = "golem_llm::golem::llm::llm" // * generate_unused_types use golem_llm::golem::llm::llm as __with_name0; #[cfg(target_arch = "wasm32")] -#[link_section = "component-type:wit-bindgen:0.36.0:golem:llm-openrouter@1.0.0:llm-library:encoded world"] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:llm-openrouter@1.0.0:llm-library:encoded world" +)] #[doc(hidden)] +#[allow(clippy::octal_escapes)] pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1763] = *b"\ \0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xe1\x0c\x01A\x02\x01\ A\x02\x01BO\x01m\x04\x04user\x09assistant\x06system\x04tool\x04\0\x04role\x03\0\0\ @@ -43,8 +46,8 @@ ng-get-next\x01B\x01p\x15\x01@\x02\x08messages\xc3\0\x06config)\06\x04\0\x04send \0\x06config)\06\x04\0\x08continue\x01G\x01i=\x01@\x02\x08messages\xc3\0\x06conf\ ig)\0\xc8\0\x04\0\x06stream\x01I\x04\0\x13golem:llm/llm@1.0.0\x05\0\x04\0&golem:\ llm-openrouter/llm-library@1.0.0\x04\0\x0b\x11\x01\0\x0bllm-library\x03\0\0\0G\x09\ -producers\x01\x0cprocessed-by\x02\x0dwit-component\x070.220.0\x10wit-bindgen-rus\ -t\x060.36.0"; +producers\x01\x0cprocessed-by\x02\x0dwit-component\x070.227.1\x10wit-bindgen-rus\ +t\x060.41.0"; #[inline(never)] #[doc(hidden)] pub fn __link_custom_section_describing_imports() { diff --git a/llm/openrouter/src/conversions.rs b/llm/openrouter/src/conversions.rs index d4db2d34c..61b5f973b 100644 --- a/llm/openrouter/src/conversions.rs +++ b/llm/openrouter/src/conversions.rs @@ -184,7 +184,7 @@ fn convert_content_parts(contents: Vec) -> crate::client::Content { let media_type = &image_source.mime_type; // This is already a string result.push(crate::client::ContentPart::ImageInput { image_url: crate::client::ImageUrl { - url: format!("data:{};base64,{}", media_type, base64_data), + url: format!("data:{media_type};base64,{base64_data}"), detail: image_source.detail.map(|d| d.into()), }, }); diff --git a/test/Cargo.lock b/test/Cargo.lock index 22a6de76b..9a670a506 100644 --- a/test/Cargo.lock +++ b/test/Cargo.lock @@ -886,6 +886,18 @@ dependencies = [ "wit-bindgen-rt 0.40.0", ] +[[package]] +name = "test_web_search" +version = "0.0.0" +dependencies = [ + "golem-rust", + "log", + "reqwest", + "serde", + "serde_json", + "wit-bindgen-rt 0.40.0", +] + [[package]] name = "thiserror" version = "2.0.12" diff --git a/test/components-rust/test-web-search/Cargo.lock b/test/components-rust/test-web-search/Cargo.lock new file mode 100644 index 000000000..bc5f25f2e --- /dev/null +++ b/test/components-rust/test-web-search/Cargo.lock @@ -0,0 +1,1376 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytes" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" + +[[package]] +name = "cc" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "test-openai" +version = "0.0.0" +dependencies = [ + "golem-rust", + "reqwest", + "serde", + "serde_json", + "wit-bindgen-rt", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "pin-utils", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "golem-rust" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c967eb388fb81f9b9f4df5d5b6634de803f21cd410c1bf687202794a4fbc0267" +dependencies = [ + "golem-rust-macro", + "serde", + "serde_json", + "uuid", + "wit-bindgen-rt", +] + +[[package]] +name = "golem-rust-macro" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bb87f831cfe4371427c63f5f4cabcc3bae1b66974c8fbcf22be9274fee3a7d1" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "h2" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "ipnet" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" + +[[package]] +name = "itoa" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "js-sys" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.169" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.52.0", +] + +[[package]] +name = "native-tls" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "openssl" +version = "0.10.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "reqwest" +version = "0.11.18" +source = "git+https://github.com/zivergetech/reqwest?branch=update-jun-2024#1cf59c67b93aa6292961f8948b93df5bca2753b6" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", + "wit-bindgen-rt", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustix" +version = "0.38.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "schannel" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1863fd3768cd83c56a7f60faa4dc0d403f1b6df0a38c3c25f44b7894e45370d5" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "serde" +version = "1.0.216" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.216" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.134" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "socket2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "syn" +version = "2.0.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53cbcb5a243bd33b7858b1d7f4aca2153490815872d86d955d6ea29f743c035" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tempfile" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "uuid" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +dependencies = [ + "getrandom", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" + +[[package]] +name = "web-sys" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "wit-bindgen-rt" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c7526379ace8709ee9ab9f2bb50f112d95581063a59ef3097d9c10153886c9" +dependencies = [ + "bitflags", +] + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/test/components-rust/test-web-search/Cargo.toml b/test/components-rust/test-web-search/Cargo.toml new file mode 100644 index 000000000..061c8cf86 --- /dev/null +++ b/test/components-rust/test-web-search/Cargo.toml @@ -0,0 +1,49 @@ +[package] +name = "test_web_search" +version = "0.0.0" +edition = "2021" + +[lib] +path = "src/lib.rs" +crate-type = ["cdylib"] +required-features = [] + +[features] +default = ["google"] +google = [] +brave = [] +tavily = [] +serper = [] + +[dependencies] +# To use common shared libs, use the following: +# common-lib = { path = "../../common-rust/common-lib" } + +golem-rust = { workspace = true } +log = { version = "0.4.27" } +reqwest = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } + +[package.metadata.component.target] +path = "wit-generated" + +[package.metadata.component.bindings.with] +"wasi:io/poll@0.2.0" = "golem_rust::wasm_rpc::wasi::io::poll" +"wasi:clocks/wall-clock@0.2.0" = "golem_rust::wasm_rpc::wasi::clocks::wall_clock" +"golem:rpc/types@0.2.0" = "golem_rust::wasm_rpc::golem_rpc_0_2_x::types" + +[package.metadata.component.target.dependencies] +"wasi:clocks" = { path = "wit-generated/deps/clocks" } +"wasi:io" = { path = "wit-generated/deps/io" } +"golem:rpc" = { path = "wit-generated/deps/golem-rpc" } +"golem:web-search" = { path = "wit-generated/deps/golem-web-search" } +"test:helper-client" = { path = "wit-generated/deps/test_helper-client" } +"test:web-search-exports" = { path = "wit-generated/deps/test_web-search-exports" } + +[package.metadata.component.bindings] +# See https://github.com/bytecodealliance/cargo-component/blob/main/src/metadata.rs#L62 + +# derives = ["serde::Serialize", "serde::Deserialize"] +# generate_unused_types = true \ No newline at end of file diff --git a/test/components-rust/test-web-search/golem.yaml b/test/components-rust/test-web-search/golem.yaml new file mode 100644 index 000000000..fb855a758 --- /dev/null +++ b/test/components-rust/test-web-search/golem.yaml @@ -0,0 +1,177 @@ +# Schema for IDEA: +# $schema: https://schema.golem.cloud/app/golem/1.1.1/golem.schema.json +# Schema for vscode-yaml +# yaml-language-server: $schema=https://schema.golem.cloud/app/golem/1.1.1/golem.schema.json + +# See https://learn.golem.cloud/docs/app-manifest#field-reference for field reference + +components: + test:web-search: + profiles: + # DEBUG PROFILES + google-debug: + build: + - command: cargo component build --no-default-features --features google + sources: + - src + - wit-generated + - ../../common-rust + targets: + - ../../target/wasm32-wasip1/debug/test_web_search.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/debug/golem_websearch_google.wasm ../../target/wasm32-wasip1/debug/test_web_search.wasm -o ../../target/wasm32-wasip1/debug/test_google_plugged.wasm + sources: + - ../../target/wasm32-wasip1/debug/test_web_search.wasm + - ../../../target/wasm32-wasip1/debug/golem_websearch_google.wasm + targets: + - ../../target/wasm32-wasip1/debug/test_google_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/debug/test_google_plugged.wasm + linkedWasm: ../../golem-temp/components/test_google_debug.wasm + clean: + - src/bindings.rs + + # BRAVE DEBUG PROFILE + brave-debug: + build: + - command: cargo component build --no-default-features --features brave + sources: + - src + - wit-generated + - ../../common-rust + targets: + - ../../target/wasm32-wasip1/debug/test_web_search.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/debug/golem_websearch_brave.wasm ../../target/wasm32-wasip1/debug/test_web_search.wasm -o ../../target/wasm32-wasip1/debug/test_brave_plugged.wasm + sources: + - ../../target/wasm32-wasip1/debug/test_web_search.wasm + - ../../../target/wasm32-wasip1/debug/golem_websearch_brave.wasm + targets: + - ../../target/wasm32-wasip1/debug/test_brave_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/debug/test_brave_plugged.wasm + linkedWasm: ../../golem-temp/components/test_brave_debug.wasm + clean: + - src/bindings.rs + + # BRAVE RELEASE PROFILE + brave-release: + build: + - command: cargo component build --release --no-default-features --features brave + sources: + - src + - wit-generated + - ../../common-rust + targets: + - ../../target/wasm32-wasip1/release/test_web_search.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/release/golem_websearch_brave.wasm ../../target/wasm32-wasip1/release/test_web_search.wasm -o ../../target/wasm32-wasip1/release/test_brave_plugged.wasm + sources: + - ../../target/wasm32-wasip1/release/test_web_search.wasm + - ../../../target/wasm32-wasip1/release/golem_websearch_brave.wasm + targets: + - ../../target/wasm32-wasip1/release/test_brave_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/release/test_brave_plugged.wasm + linkedWasm: ../../golem-temp/components/test_brave_release.wasm + clean: + - src/bindings.rs + + # TAVILY DEBUG PROFILE + tavily-debug: + build: + - command: cargo component build --no-default-features --features tavily + sources: + - src + - wit-generated + - ../../common-rust + targets: + - ../../target/wasm32-wasip1/debug/test_web_search.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/debug/golem_websearch_tavily.wasm ../../target/wasm32-wasip1/debug/test_web_search.wasm -o ../../target/wasm32-wasip1/debug/test_tavily_plugged.wasm + sources: + - ../../target/wasm32-wasip1/debug/test_web_search.wasm + - ../../../target/wasm32-wasip1/debug/golem_websearch_tavily.wasm + targets: + - ../../target/wasm32-wasip1/debug/test_tavily_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/debug/test_tavily_plugged.wasm + linkedWasm: ../../golem-temp/components/test_tavily_debug.wasm + clean: + - src/bindings.rs + + # TAVILY RELEASE PROFILE + tavily-release: + build: + - command: cargo component build --release --no-default-features --features tavily + sources: + - src + - wit-generated + - ../../common-rust + targets: + - ../../target/wasm32-wasip1/release/test_web_search.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/release/golem_websearch_tavily.wasm ../../target/wasm32-wasip1/release/test_web_search.wasm -o ../../target/wasm32-wasip1/release/test_tavily_plugged.wasm + sources: + - ../../target/wasm32-wasip1/release/test_web_search.wasm + - ../../../target/wasm32-wasip1/release/golem_websearch_tavily.wasm + targets: + - ../../target/wasm32-wasip1/release/test_tavily_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/release/test_tavily_plugged.wasm + linkedWasm: ../../golem-temp/components/test_tavily_release.wasm + clean: + - src/bindings.rs + + # SERPER DEBUG PROFILE + serper-debug: + build: + - command: cargo component build --no-default-features --features serper + sources: + - src + - wit-generated + - ../../common-rust + targets: + - ../../target/wasm32-wasip1/debug/test_web_search.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/debug/golem_websearch_serper.wasm ../../target/wasm32-wasip1/debug/test_web_search.wasm -o ../../target/wasm32-wasip1/debug/test_serper_plugged.wasm + sources: + - ../../target/wasm32-wasip1/debug/test_web_search.wasm + - ../../../target/wasm32-wasip1/debug/golem_websearch_serper.wasm + targets: + - ../../target/wasm32-wasip1/debug/test_serper_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/debug/test_serper_plugged.wasm + linkedWasm: ../../golem-temp/components/test_serper_debug.wasm + clean: + - src/bindings.rs + + # SERPER RELEASE PROFILE + serper-release: + build: + - command: cargo component build --release --no-default-features --features serper + sources: + - src + - wit-generated + - ../../common-rust + targets: + - ../../target/wasm32-wasip1/release/test_web_search.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/release/golem_websearch_serper.wasm ../../target/wasm32-wasip1/release/test_web_search.wasm -o ../../target/wasm32-wasip1/release/test_serper_plugged.wasm + sources: + - ../../target/wasm32-wasip1/release/test_web_search.wasm + - ../../../target/wasm32-wasip1/release/golem_websearch_serper.wasm + targets: + - ../../target/wasm32-wasip1/release/test_serper_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/release/test_serper_plugged.wasm + linkedWasm: ../../golem-temp/components/test_serper_release.wasm + clean: + - src/bindings.rs + + defaultProfile: google-debug + +dependencies: + test:web-search: + - target: test:helper + type: wasm-rpc diff --git a/test/components-rust/test-web-search/src/lib.rs b/test/components-rust/test-web-search/src/lib.rs new file mode 100644 index 000000000..8e51c907d --- /dev/null +++ b/test/components-rust/test-web-search/src/lib.rs @@ -0,0 +1,159 @@ +#[allow(static_mut_refs)] +mod bindings; + +use crate::bindings::exports::test::web_search_exports::test_web_search_api::*; +use crate::bindings::golem::web_search::web_search; +use crate::bindings::test::helper_client::test_helper_client::TestHelperApi; +use golem_rust::atomically; + +struct Component; + +impl Guest for Component { + /// test1 demonstrates a simple web search query + fn test1() -> String { + let params = web_search::SearchParams { + query: "Rust programming language".to_string(), + safe_search: None, + language: None, + region: None, + max_results: Some(10), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Sending web search request..."); + let response = web_search::search_once(¶ms); + println!("Response: {:?}", response); + + match response { + Ok((results, metadata)) => { + format!( + "Found {} results.\nResults: {:?}\nMetadata: {:?}", + results.len(), + results, + metadata + ) + } + Err(error) => { + format!("ERROR: {:?}", error) + } + } + } + + /// test2 demonstrates a more complex web search query with multiple terms + fn test2() -> String { + let params = web_search::SearchParams { + query: "WebAssembly WASI components tutorial".to_string(), + safe_search: None, + language: None, + region: None, + max_results: Some(5), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Sending complex web search request..."); + let response = web_search::search_once(¶ms); + println!("Response: {:?}", response); + + match response { + Ok((results, metadata)) => { + format!( + "Found {} results.\nResults: {:?}\nMetadata: {:?}", + results.len(), + results, + metadata + ) + } + Err(error) => { + format!("ERROR: {:?}", error) + } + } + } + + /// test3 demonstrates error handling with an invalid search query + fn test3() -> String { + let params = web_search::SearchParams { + query: "".to_string(), + safe_search: None, + language: None, + region: None, + max_results: Some(10), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Sending empty web search request..."); + let response = web_search::search_once(¶ms); + println!("Response: {:?}", response); + + match response { + Ok((results, _metadata)) => { + format!("Unexpected success with {} results", results.len()) + } + Err(error) => { + format!("Expected error: {:?}", error) + } + } + } + + /// test4 simulates a crash during a web search, but only first time. + /// after the automatic recovery it will continue and finish the request successfully. + fn test4() -> String { + let params = web_search::SearchParams { + query: "Golem cloud WebAssembly components".to_string(), + safe_search: None, + language: None, + region: None, + max_results: Some(10), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + let name = std::env::var("GOLEM_WORKER_NAME").unwrap(); + + atomically(|| { + let client = TestHelperApi::new(&name); + let counter = client.blocking_inc_and_get(); + if counter == 1 { + panic!("Simulating crash during web search") + } + }); + + println!("Sending web search request after recovery..."); + let response = web_search::search_once(¶ms); + println!("Response: {:?}", response); + + match response { + Ok((results, metadata)) => { + format!( + "Found {} results.\nResults: {:?}\nMetadata: {:?}", + results.len(), + results, + metadata + ) + } + Err(error) => { + format!("ERROR after recovery: {:?}", error) + } + } + } +} + +bindings::export!(Component with_types_in bindings); diff --git a/test/components-rust/test-web-search/wit/test-web-search.wit b/test/components-rust/test-web-search/wit/test-web-search.wit new file mode 100644 index 000000000..333537d1d --- /dev/null +++ b/test/components-rust/test-web-search/wit/test-web-search.wit @@ -0,0 +1,15 @@ +package test:web-search; + +// See https://component-model.bytecodealliance.org/design/wit.html for more details about the WIT syntax + +interface test-web-search-api { + test1: func() -> string; + test2: func() -> string; + test3: func() -> string; + test4: func() -> string; +} + +world test-web-search { + import golem:web-search/web-search@1.0.0; + export test-web-search-api; +} diff --git a/test/wit/deps/golem-web-search/golem-web-search.wit b/test/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..4279b7733 --- /dev/null +++ b/test/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + + + +world web-search-library { + export web-search; +} diff --git a/web-search/Makefile.toml b/web-search/Makefile.toml new file mode 100644 index 000000000..d9e8f5843 --- /dev/null +++ b/web-search/Makefile.toml @@ -0,0 +1,169 @@ +[config] +default_to_workspace = false +skip_core_tasks = true + +[tasks.build] +run_task = { name = [ + "build-google", + "build-brave", + "build-tavily", + "build-serper", +] } + +[tasks.build-portable] +run_task = { name = [ + "build-google-portable", + "build-brave-portable", + "build-tavily-portable", + "build-serper-portable", +] } + +[tasks.release-build] +run_task = { name = [ + "release-build-google", + "release-build-brave", + "release-build-tavily", + "release-build-serper", +] } + +[tasks.release-build-portable] +run_task = { name = [ + "release-build-google-portable", + "release-build-brave-portable", + "release-build-tavily-portable", + "release-build-serper-portable", +] } + +[tasks.build-google] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-google"] + +[tasks.build-google-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-google", "--no-default-features"] + +[tasks.build-brave] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-brave"] + +[tasks.build-brave-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-brave", "--no-default-features"] + +[tasks.build-tavily] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-tavily"] + +[tasks.build-tavily-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-tavily", "--no-default-features"] + +[tasks.build-serper] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-serper"] + +[tasks.build-serper-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-serper", "--no-default-features"] + +[tasks.release-build-google] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-google", "--release"] + +[tasks.release-build-google-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-google", "--release", "--no-default-features"] + +[tasks.release-build-brave] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-brave", "--release"] + +[tasks.release-build-brave-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-brave", "--release", "--no-default-features"] + +[tasks.release-build-tavily] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-tavily", "--release"] + +[tasks.release-build-tavily-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-tavily", "--release", "--no-default-features"] + +[tasks.release-build-serper] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-serper", "--release"] + +[tasks.release-build-serper-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-websearch-serper", "--release", "--no-default-features"] + +[tasks.wit-update] +install_crate = { crate_name = "wit-deps-cli" } +command = "wit-deps" +args = ["update"] + +[tasks.wit] +dependencies = ["wit-update"] + +script_runner = "@duckscript" +script = """ +modules = array web-search google brave tavily serper + +for module in ${modules} + rm -r ${module}/wit/deps + mkdir ${module}/wit/deps/golem-web-search + cp wit/golem-web-search.wit ${module}/wit/deps/golem-web-search/golem-web-search.wit + cp wit/deps/wasi:io ${module}/wit/deps + + echo "Copied WIT for module web-search::${module}" +end + +# Copy WIT files for integration tests +rm -r test/wit +mkdir test/wit/deps/golem-web-search +mkdir test/wit/deps/io +cp wit/golem-web-search.wit test/wit/deps/golem-web-search/golem-web-search.wit +cp wit/deps/wasi:io/error.wit test/wit/deps/io/error.wit +cp wit/deps/wasi:io/poll.wit test/wit/deps/io/poll.wit +cp wit/deps/wasi:io/streams.wit test/wit/deps/io/streams.wit +cp wit/deps/wasi:io/world.wit test/wit/deps/io/world.wit + +echo "Copied WIT for module test" +""" + +[tasks.build-test-components] +dependencies = ["build"] +install_crate = "cargo-binstall" +description = "Builds web-search test components with golem-cli" +script = ''' +cargo-binstall golem-cli@1.2.2-dev.11 --locked --no-confirm +cargo-binstall wac-cli --locked --no-confirm +cd ../test + +golem-cli --version +golem-cli app clean +golem-cli app build -b google-debug +golem-cli app clean +golem-cli app build -b brave-debug +golem-cli app clean +golem-cli app build -b tavily-debug +golem-cli app clean +golem-cli app build -b serper-debug +''' \ No newline at end of file diff --git a/web-search/brave/Cargo.toml b/web-search/brave/Cargo.toml new file mode 100644 index 000000000..b8b2a90b3 --- /dev/null +++ b/web-search/brave/Cargo.toml @@ -0,0 +1,43 @@ +[package] +name = "golem-websearch-brave" +version = "0.0.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly component for working with Brave Search API, with special support for Golem Cloud" + +[lib] +path = "src/lib.rs" +crate-type = ["cdylib"] + +[features] +default = ["durability"] +durability = ["golem-rust/durability", "golem-web-search/durability"] + +[dependencies] +golem-web-search = { path = "../web-search", version = "0.0.0", default-features = false } + +golem-rust = { workspace = true } +log = { workspace = true } +reqwest = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } +base64 = { workspace = true } + +[package.metadata.component] +package = "golem:websearch-brave" + +[package.metadata.component.bindings] +generate_unused_types = true + +[package.metadata.component.bindings.with] +"golem:web-search/web-search@1.0.0" = "golem_web_search::golem::web_search::web_search" + +[package.metadata.component.target] +path = "wit" + +[package.metadata.component.target.dependencies] +"golem:web-search" = { path = "wit/deps/golem-web-search" } +"wasi:io" = { path = "wit/deps/wasi:io"} diff --git a/web-search/brave/src/bindings.rs b/web-search/brave/src/bindings.rs new file mode 100644 index 000000000..55de799c4 --- /dev/null +++ b/web-search/brave/src/bindings.rs @@ -0,0 +1,318 @@ +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! +// Options used: +// * runtime_path: "wit_bindgen_rt" +// * with "golem:web-search/web-search@1.0.0" = "golem_web_search::golem::web_search::web_search" +// * generate_unused_types +use golem_web_search::golem::web_search::web_search as __with_name0; +#[rustfmt::skip] +#[allow(dead_code, clippy::all)] +pub mod golem { + pub mod web_search { + #[allow(dead_code, async_fn_in_trait, unused_imports, clippy::all)] + pub mod types { + #[used] + #[doc(hidden)] + static __FORCE_SECTION_REF: fn() = super::super::super::__link_custom_section_describing_imports; + use super::super::super::_rt; + /// Optional image-related result data + #[derive(Clone)] + pub struct ImageResult { + pub url: _rt::String, + pub description: Option<_rt::String>, + } + impl ::core::fmt::Debug for ImageResult { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("ImageResult") + .field("url", &self.url) + .field("description", &self.description) + .finish() + } + } + /// Core structure for a single search result + #[derive(Clone)] + pub struct SearchResult { + pub title: _rt::String, + pub url: _rt::String, + pub snippet: _rt::String, + pub display_url: Option<_rt::String>, + pub source: Option<_rt::String>, + pub score: Option, + pub html_snippet: Option<_rt::String>, + pub date_published: Option<_rt::String>, + pub images: Option<_rt::Vec>, + pub content_chunks: Option<_rt::Vec<_rt::String>>, + } + impl ::core::fmt::Debug for SearchResult { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchResult") + .field("title", &self.title) + .field("url", &self.url) + .field("snippet", &self.snippet) + .field("display-url", &self.display_url) + .field("source", &self.source) + .field("score", &self.score) + .field("html-snippet", &self.html_snippet) + .field("date-published", &self.date_published) + .field("images", &self.images) + .field("content-chunks", &self.content_chunks) + .finish() + } + } + /// Safe search settings + #[repr(u8)] + #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] + pub enum SafeSearchLevel { + Off, + Medium, + High, + } + impl ::core::fmt::Debug for SafeSearchLevel { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + SafeSearchLevel::Off => { + f.debug_tuple("SafeSearchLevel::Off").finish() + } + SafeSearchLevel::Medium => { + f.debug_tuple("SafeSearchLevel::Medium").finish() + } + SafeSearchLevel::High => { + f.debug_tuple("SafeSearchLevel::High").finish() + } + } + } + } + impl SafeSearchLevel { + #[doc(hidden)] + pub unsafe fn _lift(val: u8) -> SafeSearchLevel { + if !cfg!(debug_assertions) { + return ::core::mem::transmute(val); + } + match val { + 0 => SafeSearchLevel::Off, + 1 => SafeSearchLevel::Medium, + 2 => SafeSearchLevel::High, + _ => panic!("invalid enum discriminant"), + } + } + } + /// Rate limiting metadata + #[repr(C)] + #[derive(Clone, Copy)] + pub struct RateLimitInfo { + pub limit: u32, + pub remaining: u32, + pub reset_timestamp: u64, + } + impl ::core::fmt::Debug for RateLimitInfo { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("RateLimitInfo") + .field("limit", &self.limit) + .field("remaining", &self.remaining) + .field("reset-timestamp", &self.reset_timestamp) + .finish() + } + } + /// Optional metadata for a search session + #[derive(Clone)] + pub struct SearchMetadata { + pub query: _rt::String, + pub total_results: Option, + pub search_time_ms: Option, + pub safe_search: Option, + pub language: Option<_rt::String>, + pub region: Option<_rt::String>, + pub next_page_token: Option<_rt::String>, + pub rate_limits: Option, + } + impl ::core::fmt::Debug for SearchMetadata { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchMetadata") + .field("query", &self.query) + .field("total-results", &self.total_results) + .field("search-time-ms", &self.search_time_ms) + .field("safe-search", &self.safe_search) + .field("language", &self.language) + .field("region", &self.region) + .field("next-page-token", &self.next_page_token) + .field("rate-limits", &self.rate_limits) + .finish() + } + } + /// Supported time range filtering + #[repr(u8)] + #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] + pub enum TimeRange { + Day, + Week, + Month, + Year, + } + impl ::core::fmt::Debug for TimeRange { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + TimeRange::Day => f.debug_tuple("TimeRange::Day").finish(), + TimeRange::Week => f.debug_tuple("TimeRange::Week").finish(), + TimeRange::Month => f.debug_tuple("TimeRange::Month").finish(), + TimeRange::Year => f.debug_tuple("TimeRange::Year").finish(), + } + } + } + impl TimeRange { + #[doc(hidden)] + pub unsafe fn _lift(val: u8) -> TimeRange { + if !cfg!(debug_assertions) { + return ::core::mem::transmute(val); + } + match val { + 0 => TimeRange::Day, + 1 => TimeRange::Week, + 2 => TimeRange::Month, + 3 => TimeRange::Year, + _ => panic!("invalid enum discriminant"), + } + } + } + /// Query parameters accepted by the unified search API + #[derive(Clone)] + pub struct SearchParams { + pub query: _rt::String, + pub safe_search: Option, + pub language: Option<_rt::String>, + pub region: Option<_rt::String>, + pub max_results: Option, + pub time_range: Option, + pub include_domains: Option<_rt::Vec<_rt::String>>, + pub exclude_domains: Option<_rt::Vec<_rt::String>>, + pub include_images: Option, + pub include_html: Option, + pub advanced_answer: Option, + } + impl ::core::fmt::Debug for SearchParams { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchParams") + .field("query", &self.query) + .field("safe-search", &self.safe_search) + .field("language", &self.language) + .field("region", &self.region) + .field("max-results", &self.max_results) + .field("time-range", &self.time_range) + .field("include-domains", &self.include_domains) + .field("exclude-domains", &self.exclude_domains) + .field("include-images", &self.include_images) + .field("include-html", &self.include_html) + .field("advanced-answer", &self.advanced_answer) + .finish() + } + } + /// Structured search error + #[derive(Clone)] + pub enum SearchError { + InvalidQuery, + RateLimited(u32), + UnsupportedFeature(_rt::String), + BackendError(_rt::String), + } + impl ::core::fmt::Debug for SearchError { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + SearchError::InvalidQuery => { + f.debug_tuple("SearchError::InvalidQuery").finish() + } + SearchError::RateLimited(e) => { + f.debug_tuple("SearchError::RateLimited").field(e).finish() + } + SearchError::UnsupportedFeature(e) => { + f.debug_tuple("SearchError::UnsupportedFeature") + .field(e) + .finish() + } + SearchError::BackendError(e) => { + f.debug_tuple("SearchError::BackendError").field(e).finish() + } + } + } + } + impl ::core::fmt::Display for SearchError { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + write!(f, "{:?}", self) + } + } + impl std::error::Error for SearchError {} + } + } +} +#[rustfmt::skip] +mod _rt { + #![allow(dead_code, clippy::all)] + pub use alloc_crate::string::String; + pub use alloc_crate::vec::Vec; + extern crate alloc as alloc_crate; +} +#[cfg(target_arch = "wasm32")] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:web-search-tavily@1.0.0:web-search-library:encoded world" +)] +#[doc(hidden)] +#[allow(clippy::octal_escapes)] +pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1377] = *b"\ +\0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xd8\x09\x01A\x02\x01\ +A\x08\x01B\x1c\x01ks\x01r\x02\x03urls\x0bdescription\0\x04\0\x0cimage-result\x03\ +\0\x01\x01ku\x01p\x02\x01k\x04\x01ps\x01k\x06\x01r\x0a\x05titles\x03urls\x07snip\ +pets\x0bdisplay-url\0\x06source\0\x05score\x03\x0chtml-snippet\0\x0edate-publish\ +ed\0\x06images\x05\x0econtent-chunks\x07\x04\0\x0dsearch-result\x03\0\x08\x01m\x03\ +\x03off\x06medium\x04high\x04\0\x11safe-search-level\x03\0\x0a\x01r\x03\x05limit\ +y\x09remainingy\x0freset-timestampw\x04\0\x0frate-limit-info\x03\0\x0c\x01kw\x01\ +k\x0b\x01k\x0d\x01r\x08\x05querys\x0dtotal-results\x0e\x0esearch-time-ms\x03\x0b\ +safe-search\x0f\x08language\0\x06region\0\x0fnext-page-token\0\x0brate-limits\x10\ +\x04\0\x0fsearch-metadata\x03\0\x11\x01m\x04\x03day\x04week\x05month\x04year\x04\ +\0\x0atime-range\x03\0\x13\x01ky\x01k\x14\x01k\x7f\x01r\x0b\x05querys\x0bsafe-se\ +arch\x0f\x08language\0\x06region\0\x0bmax-results\x15\x0atime-range\x16\x0finclu\ +de-domains\x07\x0fexclude-domains\x07\x0einclude-images\x17\x0cinclude-html\x17\x0f\ +advanced-answer\x17\x04\0\x0dsearch-params\x03\0\x18\x01q\x04\x0dinvalid-query\0\ +\0\x0crate-limited\x01y\0\x13unsupported-feature\x01s\0\x0dbackend-error\x01s\0\x04\ +\0\x0csearch-error\x03\0\x1a\x03\0\x1cgolem:web-search/types@1.0.0\x05\0\x02\x03\ +\0\0\x0dsearch-params\x02\x03\0\0\x0dsearch-result\x02\x03\0\0\x0fsearch-metadat\ +a\x02\x03\0\0\x0csearch-error\x01B\x19\x02\x03\x02\x01\x01\x04\0\x0dsearch-param\ +s\x03\0\0\x02\x03\x02\x01\x02\x04\0\x0dsearch-result\x03\0\x02\x02\x03\x02\x01\x03\ +\x04\0\x0fsearch-metadata\x03\0\x04\x02\x03\x02\x01\x04\x04\0\x0csearch-error\x03\ +\0\x06\x04\0\x0esearch-session\x03\x01\x01h\x08\x01j\x01\x03\x01\x07\x01@\x01\x04\ +self\x09\0\x0a\x04\0\x20[method]search-session.next-page\x01\x0b\x01k\x05\x01@\x01\ +\x04self\x09\0\x0c\x04\0#[method]search-session.get-metadata\x01\x0d\x01i\x08\x01\ +j\x01\x0e\x01\x07\x01@\x01\x06params\x01\0\x0f\x04\0\x0cstart-search\x01\x10\x01\ +p\x03\x01o\x02\x11\x0c\x01j\x01\x12\x01\x07\x01@\x01\x06params\x01\0\x13\x04\0\x0b\ +search-once\x01\x14\x04\0!golem:web-search/web-search@1.0.0\x05\x05\x04\00golem:\ +web-search-tavily/web-search-library@1.0.0\x04\0\x0b\x18\x01\0\x12web-search-lib\ +rary\x03\0\0\0G\x09producers\x01\x0cprocessed-by\x02\x0dwit-component\x070.227.1\ +\x10wit-bindgen-rust\x060.41.0"; +#[inline(never)] +#[doc(hidden)] +pub fn __link_custom_section_describing_imports() { + wit_bindgen_rt::maybe_link_cabi_realloc(); +} diff --git a/web-search/brave/src/client.rs b/web-search/brave/src/client.rs new file mode 100644 index 000000000..ff5faf17f --- /dev/null +++ b/web-search/brave/src/client.rs @@ -0,0 +1,393 @@ +use golem_web_search::error::from_reqwest_error; +use golem_web_search::golem::web_search::types::SearchError; +use log::trace; +use reqwest::{header, Client, Method, Response, StatusCode}; +use serde::{Deserialize, Serialize}; + +const BASE_URL: &str = "https://api.search.brave.com/res/v1/web/search"; + +pub struct BraveSearchApi { + api_key: String, + client: Client, +} + +impl BraveSearchApi { + pub fn new(api_key: String) -> Self { + let client = Client::builder() + .build() + .expect("Failed to initialize HTTP client"); + Self { api_key, client } + } + + pub fn search(&self, request: BraveSearchRequest) -> Result { + trace!("Sending request to Brave Search API: {request:?}"); + + let response: Response = self + .client + .request(Method::GET, BASE_URL) + .header(header::ACCEPT, "application/json") + .header(header::ACCEPT_ENCODING, "identity") + .header(header::USER_AGENT, "golem-websearch/1.0") + .header("x-subscription-token", &self.api_key) + .query(&request) + .send() + .map_err(|err| from_reqwest_error("Request failed", err))?; + + parse_response(response) + } +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveSearchRequest { + pub q: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub country: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub search_lang: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ui_lang: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub offset: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub safesearch: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub freshness: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub text_decorations: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub spellcheck: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub result_filter: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub goggles_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub units: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveSearchResponse { + #[serde(skip_serializing_if = "Option::is_none")] + pub query: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub web: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub discussions: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub infobox: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub videos: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub mixed: Option, + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub response_type: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveQuery { + pub original: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub show_strict_warning: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_navigational: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_news_breaking: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub spellcheck_off: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub country: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub bad_results: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub should_fallback: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub postal_code: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub city: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub header_country: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub more_results_available: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub state: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveWebResults { + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub result_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub results: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub family_friendly: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveWebResult { + pub title: String, + pub url: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_source_local: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_source_both: Option, + pub description: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub page_age: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub profile: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub language: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub family_friendly: Option, + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub result_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub subtype: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub meta_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub thumbnail: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub age: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub extra_snippets: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveProfile { + pub name: String, + pub url: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub long_name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub img: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveMetaUrl { + #[serde(skip_serializing_if = "Option::is_none")] + pub scheme: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub netloc: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub hostname: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub favicon: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub path: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveThumbnail { + pub src: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub original: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub logo: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveDiscussions { + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub result_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub results: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub mutated_by_goggles: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveDiscussionResult { + pub title: String, + pub url: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_source_local: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_source_both: Option, + pub description: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub language: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub family_friendly: Option, + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub result_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub subtype: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub meta_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub age: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub data: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveDiscussionData { + #[serde(skip_serializing_if = "Option::is_none")] + pub forum_name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub num_answers: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub score: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub question: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub top_comment: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveInfobox { + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub result_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub results: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveInfoboxResult { + pub title: String, + pub url: String, + pub description: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub long_desc: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub category: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub position: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub label: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub website_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub attributes: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub profiles: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub images: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub providers: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveImage { + pub src: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub alt: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub original: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub logo: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveProvider { + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub provider_type: Option, + pub name: String, + pub url: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub img: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveVideos { + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub result_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub results: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub mutated_by_goggles: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveVideoResult { + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub result_type: Option, + pub url: String, + pub title: String, + pub description: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub age: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub page_age: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub video: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub meta_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub thumbnail: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveVideoInfo { + #[serde(skip_serializing_if = "Option::is_none")] + pub duration: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub views: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub creator: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub publisher: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveMixed { + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub result_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub main: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub top: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub side: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct BraveMixedItem { + #[serde(rename = "type")] + pub item_type: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub index: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub all: Option, +} + +fn parse_response(response: Response) -> Result { + match response.status() { + StatusCode::OK => { + let body = response.text().map_err(|e| { + SearchError::BackendError(format!("Failed to read response body: {e}")) + })?; + match serde_json::from_str::(&body) { + Ok(parsed) => Ok(parsed), + Err(e) => Err(SearchError::BackendError(format!( + "Failed to parse response: {e} \nRaw body: {body}" + ))), + } + } + StatusCode::TOO_MANY_REQUESTS => Err(SearchError::RateLimited(60)), + StatusCode::BAD_REQUEST => Err(SearchError::InvalidQuery), + _ => { + let status = response.status(); + let body = response + .text() + .unwrap_or_else(|_| "".into()); + Err(SearchError::BackendError(format!( + "Request failed: {status} \nRaw body: {body}" + ))) + } + } +} diff --git a/web-search/brave/src/conversions.rs b/web-search/brave/src/conversions.rs new file mode 100644 index 000000000..5b9989955 --- /dev/null +++ b/web-search/brave/src/conversions.rs @@ -0,0 +1,609 @@ +#[cfg(test)] +use crate::client::{BraveQuery, BraveWebResult, BraveWebResults}; +use crate::client::{BraveSearchRequest, BraveSearchResponse}; +use golem_web_search::golem::web_search::types::{ + ImageResult, SafeSearchLevel, SearchMetadata, SearchParams, SearchResult, TimeRange, +}; + +pub fn convert_params_to_request(params: &SearchParams, offset: Option) -> BraveSearchRequest { + let mut request = BraveSearchRequest { + q: params.query.clone(), + country: params + .region + .as_ref() + .and_then(|r| country_code_to_brave(r)), + search_lang: params + .language + .as_ref() + .and_then(|l| language_code_to_brave(l)), + ui_lang: None, + count: params.max_results, + offset, + safesearch: params.safe_search.as_ref().map(|s| match s { + SafeSearchLevel::Off => "off".to_string(), + SafeSearchLevel::Medium => "moderate".to_string(), + SafeSearchLevel::High => "strict".to_string(), + }), + freshness: params.time_range.as_ref().map(|tr| match tr { + TimeRange::Day => "pd".to_string(), + TimeRange::Week => "pw".to_string(), + TimeRange::Month => "pm".to_string(), + TimeRange::Year => "py".to_string(), + }), + text_decorations: Some(false), + spellcheck: Some(true), + result_filter: None, + goggles_id: None, + units: Some("metric".to_string()), + }; + + if let Some(include_domains) = ¶ms.include_domains { + if !include_domains.is_empty() { + let domain_query = include_domains + .iter() + .map(|domain| format!("site:{domain}")) + .collect::>() + .join(" OR "); + request.q = format!("{} ({})", request.q, domain_query); + } + } else if let Some(exclude_domains) = ¶ms.exclude_domains { + if !exclude_domains.is_empty() { + let domain_query = exclude_domains + .iter() + .map(|domain| format!("-site:{domain}")) + .collect::>() + .join(" "); + request.q = format!("{} {}", request.q, domain_query); + } + } + + request +} + +pub fn country_code_to_brave(country_code: &str) -> Option { + match country_code.to_lowercase().as_str() { + "us" | "usa" | "united states" => Some("us".to_string()), + "uk" | "gb" | "united kingdom" => Some("gb".to_string()), + "ca" | "canada" => Some("ca".to_string()), + "au" | "australia" => Some("au".to_string()), + "de" | "germany" => Some("de".to_string()), + "fr" | "france" => Some("fr".to_string()), + "es" | "spain" => Some("es".to_string()), + "it" | "italy" => Some("it".to_string()), + "jp" | "japan" => Some("jp".to_string()), + "br" | "brazil" => Some("br".to_string()), + "in" | "india" => Some("in".to_string()), + "cn" | "china" => Some("cn".to_string()), + "ru" | "russia" => Some("ru".to_string()), + "mx" | "mexico" => Some("mx".to_string()), + "ar" | "argentina" => Some("ar".to_string()), + "cl" | "chile" => Some("cl".to_string()), + "co" | "colombia" => Some("co".to_string()), + "pe" | "peru" => Some("pe".to_string()), + "za" | "south africa" => Some("za".to_string()), + "ng" | "nigeria" => Some("ng".to_string()), + "eg" | "egypt" => Some("eg".to_string()), + "kr" | "south korea" => Some("kr".to_string()), + "th" | "thailand" => Some("th".to_string()), + "sg" | "singapore" => Some("sg".to_string()), + "my" | "malaysia" => Some("my".to_string()), + "id" | "indonesia" => Some("id".to_string()), + "ph" | "philippines" => Some("ph".to_string()), + "vn" | "vietnam" => Some("vn".to_string()), + "tw" | "taiwan" => Some("tw".to_string()), + "hk" | "hong kong" => Some("hk".to_string()), + "nl" | "netherlands" => Some("nl".to_string()), + "be" | "belgium" => Some("be".to_string()), + "ch" | "switzerland" => Some("ch".to_string()), + "at" | "austria" => Some("at".to_string()), + "se" | "sweden" => Some("se".to_string()), + "no" | "norway" => Some("no".to_string()), + "dk" | "denmark" => Some("dk".to_string()), + "fi" | "finland" => Some("fi".to_string()), + "pl" | "poland" => Some("pl".to_string()), + "cz" | "czech republic" => Some("cz".to_string()), + "hu" | "hungary" => Some("hu".to_string()), + "gr" | "greece" => Some("gr".to_string()), + "pt" | "portugal" => Some("pt".to_string()), + "tr" | "turkey" => Some("tr".to_string()), + "il" | "israel" => Some("il".to_string()), + "ae" | "uae" | "united arab emirates" => Some("ae".to_string()), + "sa" | "saudi arabia" => Some("sa".to_string()), + "nz" | "new zealand" => Some("nz".to_string()), + _ => Some(country_code.to_lowercase()), + } +} + +pub fn language_code_to_brave(language_code: &str) -> Option { + let input = language_code.to_lowercase(); + + let lang_code = if input.starts_with("lang_") { + input.strip_prefix("lang_").unwrap_or(&input) + } else { + &input + }; + + match lang_code { + "en" | "english" => Some("en".to_string()), + "es" | "spanish" => Some("es".to_string()), + "fr" | "french" => Some("fr".to_string()), + "de" | "german" => Some("de".to_string()), + "it" | "italian" => Some("it".to_string()), + "pt" | "portuguese" => Some("pt".to_string()), + "ru" | "russian" => Some("ru".to_string()), + "zh" | "chinese" => Some("zh".to_string()), + "ja" | "japanese" => Some("ja".to_string()), + "ko" | "korean" => Some("ko".to_string()), + "ar" | "arabic" => Some("ar".to_string()), + "hi" | "hindi" => Some("hi".to_string()), + "th" | "thai" => Some("th".to_string()), + "vi" | "vietnamese" => Some("vi".to_string()), + "id" | "indonesian" => Some("id".to_string()), + "ms" | "malay" => Some("ms".to_string()), + "tl" | "tagalog" => Some("tl".to_string()), + "nl" | "dutch" => Some("nl".to_string()), + "sv" | "swedish" => Some("sv".to_string()), + "no" | "norwegian" => Some("no".to_string()), + "da" | "danish" => Some("da".to_string()), + "fi" | "finnish" => Some("fi".to_string()), + "pl" | "polish" => Some("pl".to_string()), + "cs" | "czech" => Some("cs".to_string()), + "hu" | "hungarian" => Some("hu".to_string()), + "el" | "greek" => Some("el".to_string()), + "tr" | "turkish" => Some("tr".to_string()), + "he" | "hebrew" => Some("he".to_string()), + "fa" | "persian" => Some("fa".to_string()), + "ur" | "urdu" => Some("ur".to_string()), + "bn" | "bengali" => Some("bn".to_string()), + "ta" | "tamil" => Some("ta".to_string()), + "te" | "telugu" => Some("te".to_string()), + "ml" | "malayalam" => Some("ml".to_string()), + "kn" | "kannada" => Some("kn".to_string()), + "gu" | "gujarati" => Some("gu".to_string()), + "pa" | "punjabi" => Some("pa".to_string()), + "mr" | "marathi" => Some("mr".to_string()), + "ne" | "nepali" => Some("ne".to_string()), + "si" | "sinhala" => Some("si".to_string()), + "my" | "myanmar" => Some("my".to_string()), + "km" | "khmer" => Some("km".to_string()), + "lo" | "lao" => Some("lo".to_string()), + "ka" | "georgian" => Some("ka".to_string()), + "hy" | "armenian" => Some("hy".to_string()), + "az" | "azerbaijani" => Some("az".to_string()), + "kk" | "kazakh" => Some("kk".to_string()), + "ky" | "kyrgyz" => Some("ky".to_string()), + "mn" | "mongolian" => Some("mn".to_string()), + "uz" | "uzbek" => Some("uz".to_string()), + "uk" | "ukrainian" => Some("uk".to_string()), + "bg" | "bulgarian" => Some("bg".to_string()), + "hr" | "croatian" => Some("hr".to_string()), + "sr" | "serbian" => Some("sr".to_string()), + "bs" | "bosnian" => Some("bs".to_string()), + "mk" | "macedonian" => Some("mk".to_string()), + "sl" | "slovenian" => Some("sl".to_string()), + "sk" | "slovak" => Some("sk".to_string()), + "ro" | "romanian" => Some("ro".to_string()), + "lv" | "latvian" => Some("lv".to_string()), + "lt" | "lithuanian" => Some("lt".to_string()), + "et" | "estonian" => Some("et".to_string()), + "mt" | "maltese" => Some("mt".to_string()), + "is" | "icelandic" => Some("is".to_string()), + "ga" | "irish" => Some("ga".to_string()), + "cy" | "welsh" => Some("cy".to_string()), + "eu" | "basque" => Some("eu".to_string()), + "ca" | "catalan" => Some("ca".to_string()), + "gl" | "galician" => Some("gl".to_string()), + "af" | "afrikaans" => Some("af".to_string()), + "sw" | "swahili" => Some("sw".to_string()), + "am" | "amharic" => Some("am".to_string()), + "or" | "oriya" => Some("or".to_string()), + "as" | "assamese" => Some("as".to_string()), + "sd" | "sindhi" => Some("sd".to_string()), + "ps" | "pashto" => Some("ps".to_string()), + "tg" | "tajik" => Some("tg".to_string()), + "tk" | "turkmen" => Some("tk".to_string()), + _ => Some(lang_code.to_string()), + } +} + +pub fn convert_response_to_results( + response: BraveSearchResponse, + params: &SearchParams, +) -> (Vec, Option) { + let mut results = Vec::new(); + + if let Some(web) = &response.web { + if let Some(web_results) = &web.results { + for item in web_results { + results.push(SearchResult { + title: item.title.clone(), + url: item.url.clone(), + snippet: item.description.clone(), + display_url: item.meta_url.as_ref().map(|meta| { + meta.hostname.clone().unwrap_or_else(|| { + item.profile + .as_ref() + .map(|p| p.long_name.clone().unwrap_or(p.name.clone())) + .unwrap_or(item.url.clone()) + }) + }), + source: Some("Brave".to_string()), + score: None, + html_snippet: if params.include_html.unwrap_or(false) { + Some(item.description.clone()) + } else { + None + }, + date_published: item.age.clone().or_else(|| item.page_age.clone()), + images: item.thumbnail.as_ref().map(|thumb| { + vec![ImageResult { + url: thumb.src.clone(), + description: thumb.original.clone(), + }] + }), + content_chunks: item.extra_snippets.clone(), + }); + } + } + } + + let total_results = response + .query + .as_ref() + .and_then(|q| q.more_results_available) + .map(|has_more| { + if has_more { + 1000u64 + } else { + results.len() as u64 + } + }); + + let next_page_token = response + .query + .as_ref() + .and_then(|q| q.more_results_available) + .filter(|&has_more| has_more) + .map(|_| { + let current_offset = params.max_results.unwrap_or(10); + (current_offset + params.max_results.unwrap_or(10)).to_string() + }); + + let metadata = SearchMetadata { + query: params.query.clone(), + total_results, + search_time_ms: None, + safe_search: params.safe_search, + language: params.language.clone(), + region: params.region.clone(), + next_page_token, + rate_limits: None, + }; + + (results, Some(metadata)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::client::{BraveMetaUrl, BraveProfile, BraveThumbnail}; + + fn create_test_params() -> SearchParams { + SearchParams { + query: "test query".to_string(), + safe_search: Some(SafeSearchLevel::Medium), + language: Some("en".to_string()), + region: Some("us".to_string()), + max_results: Some(10), + time_range: Some(TimeRange::Week), + include_domains: Some(vec!["example.com".to_string(), "test.org".to_string()]), + exclude_domains: None, + include_images: Some(true), + include_html: Some(true), + advanced_answer: Some(false), + } + } + + fn create_test_response() -> BraveSearchResponse { + BraveSearchResponse { + query: Some(BraveQuery { + original: "test query".to_string(), + show_strict_warning: Some(false), + is_navigational: Some(false), + is_news_breaking: Some(false), + spellcheck_off: Some(false), + country: Some("us".to_string()), + bad_results: Some(false), + should_fallback: Some(false), + postal_code: None, + city: None, + header_country: None, + more_results_available: Some(true), + state: None, + }), + web: Some(BraveWebResults { + result_type: Some("search".to_string()), + results: Some(vec![ + BraveWebResult { + title: "Test Result 1".to_string(), + url: "https://example.com/1".to_string(), + is_source_local: Some(false), + is_source_both: Some(false), + description: "This is a test snippet 1".to_string(), + page_age: Some("2023-01-15T10:30:00".to_string()), + profile: Some(BraveProfile { + name: "Example".to_string(), + url: "https://example.com".to_string(), + long_name: Some("example.com".to_string()), + img: None, + }), + language: Some("en".to_string()), + family_friendly: Some(true), + result_type: Some("search_result".to_string()), + subtype: Some("generic".to_string()), + meta_url: Some(BraveMetaUrl { + scheme: Some("https".to_string()), + netloc: Some("example.com".to_string()), + hostname: Some("example.com".to_string()), + favicon: None, + path: Some("/1".to_string()), + }), + thumbnail: Some(BraveThumbnail { + src: "https://example.com/thumb1.jpg".to_string(), + original: Some("https://example.com/orig1.jpg".to_string()), + logo: Some(false), + }), + age: Some("2 days ago".to_string()), + extra_snippets: Some(vec!["Extra info 1".to_string()]), + }, + BraveWebResult { + title: "Test Result 2".to_string(), + url: "https://test.org/2".to_string(), + is_source_local: Some(false), + is_source_both: Some(false), + description: "This is a test snippet 2".to_string(), + page_age: None, + profile: None, + language: Some("en".to_string()), + family_friendly: Some(true), + result_type: Some("search_result".to_string()), + subtype: Some("generic".to_string()), + meta_url: Some(BraveMetaUrl { + scheme: Some("https".to_string()), + netloc: Some("test.org".to_string()), + hostname: Some("test.org".to_string()), + favicon: None, + path: Some("/2".to_string()), + }), + thumbnail: None, + age: None, + extra_snippets: None, + }, + ]), + family_friendly: Some(true), + }), + discussions: None, + infobox: None, + videos: None, + mixed: None, + response_type: Some("search".to_string()), + } + } + + #[test] + fn test_convert_params_to_request_basic() { + let params = SearchParams { + query: "basic test".to_string(), + safe_search: None, + language: None, + region: None, + max_results: None, + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + let request = convert_params_to_request(¶ms, None); + + assert_eq!(request.q, "basic test"); + assert_eq!(request.country, None); + assert_eq!(request.search_lang, None); + assert_eq!(request.ui_lang, None); + assert_eq!(request.count, None); + assert_eq!(request.offset, None); + assert_eq!(request.safesearch, None); + assert_eq!(request.freshness, None); + assert_eq!(request.text_decorations, Some(false)); + assert_eq!(request.spellcheck, Some(true)); + assert_eq!(request.units, Some("metric".to_string())); + } + + #[test] + fn test_convert_params_to_request_full() { + let params = create_test_params(); + let request = convert_params_to_request(¶ms, Some(20)); + + assert_eq!(request.q, "test query (site:example.com OR site:test.org)"); + assert_eq!(request.country, Some("us".to_string())); + assert_eq!(request.search_lang, Some("en".to_string())); + assert_eq!(request.ui_lang, None); + assert_eq!(request.count, Some(10)); + assert_eq!(request.offset, Some(20)); + assert_eq!(request.safesearch, Some("moderate".to_string())); + assert_eq!(request.freshness, Some("pw".to_string())); + } + + #[test] + fn test_convert_params_safe_search_levels() { + let test_cases = vec![ + (SafeSearchLevel::Off, "off"), + (SafeSearchLevel::Medium, "moderate"), + (SafeSearchLevel::High, "strict"), + ]; + + for (level, expected) in test_cases { + let params = SearchParams { + query: "test".to_string(), + safe_search: Some(level), + language: None, + region: None, + max_results: None, + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + let request = convert_params_to_request(¶ms, None); + assert_eq!(request.safesearch, Some(expected.to_string())); + } + } + + #[test] + fn test_convert_params_time_ranges() { + let test_cases = vec![ + (TimeRange::Day, "pd"), + (TimeRange::Week, "pw"), + (TimeRange::Month, "pm"), + (TimeRange::Year, "py"), + ]; + + for (range, expected) in test_cases { + let params = SearchParams { + query: "test".to_string(), + safe_search: None, + language: None, + region: None, + max_results: None, + time_range: Some(range), + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + let request = convert_params_to_request(¶ms, None); + assert_eq!(request.freshness, Some(expected.to_string())); + } + } + + #[test] + fn test_convert_params_exclude_domains() { + let params = SearchParams { + query: "test".to_string(), + safe_search: None, + language: None, + region: None, + max_results: None, + time_range: None, + include_domains: None, + exclude_domains: Some(vec!["spam.com".to_string(), "bad.org".to_string()]), + include_images: None, + include_html: None, + advanced_answer: None, + }; + + let request = convert_params_to_request(¶ms, None); + assert_eq!(request.q, "test -site:spam.com -site:bad.org"); + } + + #[test] + fn test_convert_response_to_results_basic() { + let params = create_test_params(); + let response = create_test_response(); + + let (results, metadata) = convert_response_to_results(response, ¶ms); + + assert_eq!(results.len(), 2); + + assert_eq!(results[0].title, "Test Result 1"); + assert_eq!(results[0].url, "https://example.com/1"); + assert_eq!(results[0].snippet, "This is a test snippet 1"); + assert_eq!(results[0].display_url, Some("example.com".to_string())); + assert_eq!(results[0].source, Some("Brave".to_string())); + assert_eq!(results[0].score, None); + assert_eq!( + results[0].html_snippet, + Some("This is a test snippet 1".to_string()) + ); + assert_eq!(results[0].date_published, Some("2 days ago".to_string())); + assert!(results[0].images.is_some()); + assert_eq!( + results[0].content_chunks, + Some(vec!["Extra info 1".to_string()]) + ); + + assert_eq!(results[1].title, "Test Result 2"); + assert_eq!(results[1].url, "https://test.org/2"); + assert_eq!(results[1].snippet, "This is a test snippet 2"); + assert_eq!(results[1].display_url, Some("test.org".to_string())); + + assert!(metadata.is_some()); + let meta = metadata.unwrap(); + assert_eq!(meta.query, "test query"); + assert_eq!(meta.total_results, Some(1000)); + assert_eq!(meta.search_time_ms, None); + assert_eq!(meta.safe_search, Some(SafeSearchLevel::Medium)); + assert_eq!(meta.language, Some("en".to_string())); + assert_eq!(meta.region, Some("us".to_string())); + assert_eq!(meta.next_page_token, Some("20".to_string())); + assert_eq!(meta.rate_limits, None); + } + + #[test] + fn test_convert_response_to_results_no_html() { + let mut params = create_test_params(); + params.include_html = Some(false); + let response = create_test_response(); + + let (results, _) = convert_response_to_results(response, ¶ms); + + assert_eq!(results.len(), 2); + assert_eq!(results[0].html_snippet, None); + assert_eq!(results[1].html_snippet, None); + } + + #[test] + fn test_convert_response_to_results_empty() { + let params = create_test_params(); + let response = BraveSearchResponse { + query: Some(BraveQuery { + original: "test query".to_string(), + show_strict_warning: None, + is_navigational: None, + is_news_breaking: None, + spellcheck_off: None, + country: None, + bad_results: None, + should_fallback: None, + postal_code: None, + city: None, + header_country: None, + more_results_available: Some(false), + state: None, + }), + web: None, + discussions: None, + infobox: None, + videos: None, + mixed: None, + response_type: None, + }; + + let (results, metadata) = convert_response_to_results(response, ¶ms); + + assert_eq!(results.len(), 0); + assert!(metadata.is_some()); + let meta = metadata.unwrap(); + assert_eq!(meta.query, "test query"); + assert_eq!(meta.total_results, Some(0)); + assert_eq!(meta.search_time_ms, None); + assert_eq!(meta.next_page_token, None); + } +} diff --git a/web-search/brave/src/lib.rs b/web-search/brave/src/lib.rs new file mode 100644 index 000000000..c1d6939d0 --- /dev/null +++ b/web-search/brave/src/lib.rs @@ -0,0 +1,117 @@ +use crate::client::BraveSearchApi; +use crate::conversions::{convert_params_to_request, convert_response_to_results}; +use golem_web_search::config::with_config_key; + +use golem_web_search::durability::DurableWebSearch; +use golem_web_search::golem::web_search::types::{ + SearchError, SearchMetadata, SearchParams, SearchResult, +}; +use golem_web_search::golem_web_search::web_search::web_search::{ + Guest, GuestSearchSession, SearchSession, +}; + +use golem_web_search::LOGGING_STATE; +use std::cell::RefCell; + +mod client; +mod conversions; + +struct BraveWebSearchComponent; + +impl BraveWebSearchComponent { + const API_KEY_ENV_VAR: &'static str = "BRAVE_API_KEY"; +} + +pub struct BraveSearchSession { + client: BraveSearchApi, + params: SearchParams, + current_offset: RefCell, + last_metadata: RefCell>, + has_more_results: RefCell, +} + +impl BraveSearchSession { + fn new(client: BraveSearchApi, params: SearchParams) -> Self { + Self { + client, + params, + current_offset: RefCell::new(0), + last_metadata: RefCell::new(None), + has_more_results: RefCell::new(true), + } + } +} + +impl GuestSearchSession for BraveSearchSession { + fn next_page(&self) -> Result { + if !*self.has_more_results.borrow() { + return Err(SearchError::BackendError( + "No more results available".to_string(), + )); + } + + let page_size = self.params.max_results.unwrap_or(10); + let current_offset = *self.current_offset.borrow(); + let new_offset = current_offset + page_size; + *self.current_offset.borrow_mut() = new_offset; + + let request = convert_params_to_request(&self.params, Some(new_offset)); + let response = self.client.search(request)?; + let (results, metadata) = convert_response_to_results(response, &self.params); + + *self.last_metadata.borrow_mut() = metadata.clone(); + + if results.is_empty() { + *self.has_more_results.borrow_mut() = false; + return Err(SearchError::BackendError("No more results".to_string())); + } + + if let Some(metadata) = &metadata { + *self.has_more_results.borrow_mut() = metadata.next_page_token.is_some(); + } else { + *self.has_more_results.borrow_mut() = false; + } + + results + .into_iter() + .next() + .ok_or_else(|| SearchError::BackendError("No results returned".to_string())) + } + + fn get_metadata(&self) -> Option { + self.last_metadata.borrow().clone() + } +} + +impl Guest for BraveWebSearchComponent { + type SearchSession = BraveSearchSession; + + fn start_search(params: SearchParams) -> Result { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + + with_config_key(&[Self::API_KEY_ENV_VAR], Err, |keys| { + let api_key = keys.get(Self::API_KEY_ENV_VAR).unwrap().to_owned(); + let client = BraveSearchApi::new(api_key); + Ok(SearchSession::new(BraveSearchSession::new(client, params))) + }) + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + + with_config_key(&[Self::API_KEY_ENV_VAR], Err, |keys| { + let api_key = keys.get(Self::API_KEY_ENV_VAR).unwrap().to_owned(); + let client = BraveSearchApi::new(api_key); + let request = convert_params_to_request(¶ms, None); + let response = client.search(request)?; + let (results, metadata) = convert_response_to_results(response, ¶ms); + Ok((results, metadata)) + }) + } +} + +type DurableBraveWebSearchComponent = DurableWebSearch; + +golem_web_search::export_web_search!(DurableBraveWebSearchComponent with_types_in golem_web_search); diff --git a/web-search/brave/wit/brave.wit b/web-search/brave/wit/brave.wit new file mode 100644 index 000000000..3cc5e54e2 --- /dev/null +++ b/web-search/brave/wit/brave.wit @@ -0,0 +1,5 @@ +package golem:web-search-tavily@1.0.0; + +world web-search-library { + include golem:web-search/web-search-library@1.0.0; +} diff --git a/web-search/brave/wit/deps/golem-web-search/golem-web-search.wit b/web-search/brave/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..4279b7733 --- /dev/null +++ b/web-search/brave/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + + + +world web-search-library { + export web-search; +} diff --git a/web-search/brave/wit/deps/wasi:io/error.wit b/web-search/brave/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/web-search/brave/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/web-search/brave/wit/deps/wasi:io/poll.wit b/web-search/brave/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/web-search/brave/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/web-search/brave/wit/deps/wasi:io/streams.wit b/web-search/brave/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/web-search/brave/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/web-search/brave/wit/deps/wasi:io/world.wit b/web-search/brave/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/web-search/brave/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/web-search/google/Cargo.toml b/web-search/google/Cargo.toml new file mode 100644 index 000000000..97f675b9d --- /dev/null +++ b/web-search/google/Cargo.toml @@ -0,0 +1,43 @@ +[package] +name = "golem-websearch-google" +version = "0.0.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly component for working with Anthropic APIs, with special support for Golem Cloud" + +[lib] +path = "src/lib.rs" +crate-type = ["cdylib"] + +[features] +default = ["durability"] +durability = ["golem-rust/durability", "golem-web-search/durability"] + +[dependencies] +golem-web-search = { path = "../web-search", version = "0.0.0", default-features = false } + +golem-rust = { workspace = true } +log = { workspace = true } +reqwest = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } +base64 = { workspace = true } + +[package.metadata.component] +package = "golem:websearch-google" + +[package.metadata.component.bindings] +generate_unused_types = true + +[package.metadata.component.bindings.with] +"golem:web-search/web-search@1.0.0" = "golem_web_search::golem::web_search::web_search" + +[package.metadata.component.target] +path = "wit" + +[package.metadata.component.target.dependencies] +"golem:web-search" = { path = "wit/deps/golem-web-search" } +"wasi:io" = { path = "wit/deps/wasi:io"} diff --git a/web-search/google/src/bindings.rs b/web-search/google/src/bindings.rs new file mode 100644 index 000000000..2f0ee2902 --- /dev/null +++ b/web-search/google/src/bindings.rs @@ -0,0 +1,318 @@ +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! +// Options used: +// * runtime_path: "wit_bindgen_rt" +// * with "golem:web-search/web-search@1.0.0" = "golem_web_search::golem::web_search::web_search" +// * generate_unused_types +use golem_web_search::golem::web_search::web_search as __with_name0; +#[rustfmt::skip] +#[allow(dead_code, clippy::all)] +pub mod golem { + pub mod web_search { + #[allow(dead_code, async_fn_in_trait, unused_imports, clippy::all)] + pub mod types { + #[used] + #[doc(hidden)] + static __FORCE_SECTION_REF: fn() = super::super::super::__link_custom_section_describing_imports; + use super::super::super::_rt; + /// Optional image-related result data + #[derive(Clone)] + pub struct ImageResult { + pub url: _rt::String, + pub description: Option<_rt::String>, + } + impl ::core::fmt::Debug for ImageResult { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("ImageResult") + .field("url", &self.url) + .field("description", &self.description) + .finish() + } + } + /// Core structure for a single search result + #[derive(Clone)] + pub struct SearchResult { + pub title: _rt::String, + pub url: _rt::String, + pub snippet: _rt::String, + pub display_url: Option<_rt::String>, + pub source: Option<_rt::String>, + pub score: Option, + pub html_snippet: Option<_rt::String>, + pub date_published: Option<_rt::String>, + pub images: Option<_rt::Vec>, + pub content_chunks: Option<_rt::Vec<_rt::String>>, + } + impl ::core::fmt::Debug for SearchResult { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchResult") + .field("title", &self.title) + .field("url", &self.url) + .field("snippet", &self.snippet) + .field("display-url", &self.display_url) + .field("source", &self.source) + .field("score", &self.score) + .field("html-snippet", &self.html_snippet) + .field("date-published", &self.date_published) + .field("images", &self.images) + .field("content-chunks", &self.content_chunks) + .finish() + } + } + /// Safe search settings + #[repr(u8)] + #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] + pub enum SafeSearchLevel { + Off, + Medium, + High, + } + impl ::core::fmt::Debug for SafeSearchLevel { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + SafeSearchLevel::Off => { + f.debug_tuple("SafeSearchLevel::Off").finish() + } + SafeSearchLevel::Medium => { + f.debug_tuple("SafeSearchLevel::Medium").finish() + } + SafeSearchLevel::High => { + f.debug_tuple("SafeSearchLevel::High").finish() + } + } + } + } + impl SafeSearchLevel { + #[doc(hidden)] + pub unsafe fn _lift(val: u8) -> SafeSearchLevel { + if !cfg!(debug_assertions) { + return ::core::mem::transmute(val); + } + match val { + 0 => SafeSearchLevel::Off, + 1 => SafeSearchLevel::Medium, + 2 => SafeSearchLevel::High, + _ => panic!("invalid enum discriminant"), + } + } + } + /// Rate limiting metadata + #[repr(C)] + #[derive(Clone, Copy)] + pub struct RateLimitInfo { + pub limit: u32, + pub remaining: u32, + pub reset_timestamp: u64, + } + impl ::core::fmt::Debug for RateLimitInfo { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("RateLimitInfo") + .field("limit", &self.limit) + .field("remaining", &self.remaining) + .field("reset-timestamp", &self.reset_timestamp) + .finish() + } + } + /// Optional metadata for a search session + #[derive(Clone)] + pub struct SearchMetadata { + pub query: _rt::String, + pub total_results: Option, + pub search_time_ms: Option, + pub safe_search: Option, + pub language: Option<_rt::String>, + pub region: Option<_rt::String>, + pub next_page_token: Option<_rt::String>, + pub rate_limits: Option, + } + impl ::core::fmt::Debug for SearchMetadata { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchMetadata") + .field("query", &self.query) + .field("total-results", &self.total_results) + .field("search-time-ms", &self.search_time_ms) + .field("safe-search", &self.safe_search) + .field("language", &self.language) + .field("region", &self.region) + .field("next-page-token", &self.next_page_token) + .field("rate-limits", &self.rate_limits) + .finish() + } + } + /// Supported time range filtering + #[repr(u8)] + #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] + pub enum TimeRange { + Day, + Week, + Month, + Year, + } + impl ::core::fmt::Debug for TimeRange { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + TimeRange::Day => f.debug_tuple("TimeRange::Day").finish(), + TimeRange::Week => f.debug_tuple("TimeRange::Week").finish(), + TimeRange::Month => f.debug_tuple("TimeRange::Month").finish(), + TimeRange::Year => f.debug_tuple("TimeRange::Year").finish(), + } + } + } + impl TimeRange { + #[doc(hidden)] + pub unsafe fn _lift(val: u8) -> TimeRange { + if !cfg!(debug_assertions) { + return ::core::mem::transmute(val); + } + match val { + 0 => TimeRange::Day, + 1 => TimeRange::Week, + 2 => TimeRange::Month, + 3 => TimeRange::Year, + _ => panic!("invalid enum discriminant"), + } + } + } + /// Query parameters accepted by the unified search API + #[derive(Clone)] + pub struct SearchParams { + pub query: _rt::String, + pub safe_search: Option, + pub language: Option<_rt::String>, + pub region: Option<_rt::String>, + pub max_results: Option, + pub time_range: Option, + pub include_domains: Option<_rt::Vec<_rt::String>>, + pub exclude_domains: Option<_rt::Vec<_rt::String>>, + pub include_images: Option, + pub include_html: Option, + pub advanced_answer: Option, + } + impl ::core::fmt::Debug for SearchParams { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchParams") + .field("query", &self.query) + .field("safe-search", &self.safe_search) + .field("language", &self.language) + .field("region", &self.region) + .field("max-results", &self.max_results) + .field("time-range", &self.time_range) + .field("include-domains", &self.include_domains) + .field("exclude-domains", &self.exclude_domains) + .field("include-images", &self.include_images) + .field("include-html", &self.include_html) + .field("advanced-answer", &self.advanced_answer) + .finish() + } + } + /// Structured search error + #[derive(Clone)] + pub enum SearchError { + InvalidQuery, + RateLimited(u32), + UnsupportedFeature(_rt::String), + BackendError(_rt::String), + } + impl ::core::fmt::Debug for SearchError { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + SearchError::InvalidQuery => { + f.debug_tuple("SearchError::InvalidQuery").finish() + } + SearchError::RateLimited(e) => { + f.debug_tuple("SearchError::RateLimited").field(e).finish() + } + SearchError::UnsupportedFeature(e) => { + f.debug_tuple("SearchError::UnsupportedFeature") + .field(e) + .finish() + } + SearchError::BackendError(e) => { + f.debug_tuple("SearchError::BackendError").field(e).finish() + } + } + } + } + impl ::core::fmt::Display for SearchError { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + write!(f, "{:?}", self) + } + } + impl std::error::Error for SearchError {} + } + } +} +#[rustfmt::skip] +mod _rt { + #![allow(dead_code, clippy::all)] + pub use alloc_crate::string::String; + pub use alloc_crate::vec::Vec; + extern crate alloc as alloc_crate; +} +#[cfg(target_arch = "wasm32")] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:web-search-google@1.0.0:web-search-library:encoded world" +)] +#[doc(hidden)] +#[allow(clippy::octal_escapes)] +pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1377] = *b"\ +\0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xd8\x09\x01A\x02\x01\ +A\x08\x01B\x1c\x01ks\x01r\x02\x03urls\x0bdescription\0\x04\0\x0cimage-result\x03\ +\0\x01\x01ku\x01p\x02\x01k\x04\x01ps\x01k\x06\x01r\x0a\x05titles\x03urls\x07snip\ +pets\x0bdisplay-url\0\x06source\0\x05score\x03\x0chtml-snippet\0\x0edate-publish\ +ed\0\x06images\x05\x0econtent-chunks\x07\x04\0\x0dsearch-result\x03\0\x08\x01m\x03\ +\x03off\x06medium\x04high\x04\0\x11safe-search-level\x03\0\x0a\x01r\x03\x05limit\ +y\x09remainingy\x0freset-timestampw\x04\0\x0frate-limit-info\x03\0\x0c\x01kw\x01\ +k\x0b\x01k\x0d\x01r\x08\x05querys\x0dtotal-results\x0e\x0esearch-time-ms\x03\x0b\ +safe-search\x0f\x08language\0\x06region\0\x0fnext-page-token\0\x0brate-limits\x10\ +\x04\0\x0fsearch-metadata\x03\0\x11\x01m\x04\x03day\x04week\x05month\x04year\x04\ +\0\x0atime-range\x03\0\x13\x01ky\x01k\x14\x01k\x7f\x01r\x0b\x05querys\x0bsafe-se\ +arch\x0f\x08language\0\x06region\0\x0bmax-results\x15\x0atime-range\x16\x0finclu\ +de-domains\x07\x0fexclude-domains\x07\x0einclude-images\x17\x0cinclude-html\x17\x0f\ +advanced-answer\x17\x04\0\x0dsearch-params\x03\0\x18\x01q\x04\x0dinvalid-query\0\ +\0\x0crate-limited\x01y\0\x13unsupported-feature\x01s\0\x0dbackend-error\x01s\0\x04\ +\0\x0csearch-error\x03\0\x1a\x03\0\x1cgolem:web-search/types@1.0.0\x05\0\x02\x03\ +\0\0\x0dsearch-params\x02\x03\0\0\x0dsearch-result\x02\x03\0\0\x0fsearch-metadat\ +a\x02\x03\0\0\x0csearch-error\x01B\x19\x02\x03\x02\x01\x01\x04\0\x0dsearch-param\ +s\x03\0\0\x02\x03\x02\x01\x02\x04\0\x0dsearch-result\x03\0\x02\x02\x03\x02\x01\x03\ +\x04\0\x0fsearch-metadata\x03\0\x04\x02\x03\x02\x01\x04\x04\0\x0csearch-error\x03\ +\0\x06\x04\0\x0esearch-session\x03\x01\x01h\x08\x01j\x01\x03\x01\x07\x01@\x01\x04\ +self\x09\0\x0a\x04\0\x20[method]search-session.next-page\x01\x0b\x01k\x05\x01@\x01\ +\x04self\x09\0\x0c\x04\0#[method]search-session.get-metadata\x01\x0d\x01i\x08\x01\ +j\x01\x0e\x01\x07\x01@\x01\x06params\x01\0\x0f\x04\0\x0cstart-search\x01\x10\x01\ +p\x03\x01o\x02\x11\x0c\x01j\x01\x12\x01\x07\x01@\x01\x06params\x01\0\x13\x04\0\x0b\ +search-once\x01\x14\x04\0!golem:web-search/web-search@1.0.0\x05\x05\x04\00golem:\ +web-search-google/web-search-library@1.0.0\x04\0\x0b\x18\x01\0\x12web-search-lib\ +rary\x03\0\0\0G\x09producers\x01\x0cprocessed-by\x02\x0dwit-component\x070.227.1\ +\x10wit-bindgen-rust\x060.41.0"; +#[inline(never)] +#[doc(hidden)] +pub fn __link_custom_section_describing_imports() { + wit_bindgen_rt::maybe_link_cabi_realloc(); +} diff --git a/web-search/google/src/client.rs b/web-search/google/src/client.rs new file mode 100644 index 000000000..e7fd8f889 --- /dev/null +++ b/web-search/google/src/client.rs @@ -0,0 +1,170 @@ +use golem_web_search::error::from_reqwest_error; +use golem_web_search::golem::web_search::types::SearchError; +use log::trace; +use reqwest::{Client, Method, Response}; +use serde::{Deserialize, Serialize}; + +const BASE_URL: &str = "https://www.googleapis.com/customsearch/v1"; + +/// Google Search API Client +/// +/// Docs: https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list +pub struct GoogleSearchApi { + api_key: String, + search_engine_id: String, + client: Client, +} + +impl GoogleSearchApi { + pub fn new(api_key: String, search_engine_id: String) -> Self { + let client = Client::builder() + .build() + .expect("Failed to initialize HTTP client"); + Self { + api_key, + search_engine_id, + client, + } + } + + pub fn search( + &self, + mut request: GoogleSearchRequest, + ) -> Result { + request.key = self.api_key.clone(); + request.cx = self.search_engine_id.clone(); + + trace!("Sending request to Google Custom Search API: {request:?}"); + + let response: Response = self + .client + .request(Method::GET, BASE_URL) + .query(&request) + .send() + .map_err(|err| from_reqwest_error("Request failed", err))?; + + parse_response(response) + } +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct GoogleSearchRequest { + pub q: String, + pub cx: String, + pub key: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub num: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub start: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub safe: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub lr: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub gl: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "dateRestrict")] + pub date_restrict: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "siteSearch")] + pub site_search: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "siteSearchFilter")] + pub site_search_filter: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct GoogleSearchResponse { + #[serde(skip_serializing_if = "Option::is_none")] + pub items: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "searchInformation")] + pub search_information: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub queries: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct GoogleSearchItem { + pub title: String, + pub link: String, + pub snippet: String, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "displayLink")] + pub display_link: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "htmlSnippet")] + pub html_snippet: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "pagemap")] + pub pagemap: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct GoogleSearchInformation { + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "totalResults")] + pub total_results: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "searchTime")] + pub search_time: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct GoogleQueries { + #[serde(skip_serializing_if = "Option::is_none")] + pub request: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "nextPage")] + pub next_page: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct GoogleQueryInfo { + pub title: String, + #[serde(rename = "totalResults")] + pub total_results: Option, + #[serde(rename = "searchTerms")] + pub search_terms: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub count: Option, + #[serde(rename = "startIndex")] + pub start_index: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct GoogleError { + pub code: u32, + pub message: String, + pub status: String, +} + +fn parse_response(response: Response) -> Result { + let status = response.status(); + if !status.is_success() { + return Err(SearchError::BackendError(format!( + "HTTP {}: {}", + status, + status.canonical_reason().unwrap_or("Unknown error") + ))); + } + + let search_response: GoogleSearchResponse = response + .json() + .map_err(|e| SearchError::BackendError(format!("JSON parsing failed: {e}")))?; + + if let Some(error) = &search_response.error { + return match error.code { + 429 => Err(SearchError::RateLimited(60)), + 400 => Err(SearchError::InvalidQuery), + _ => Err(SearchError::BackendError(format!( + "Google API error: {}", + error.message + ))), + }; + } + + Ok(search_response) +} diff --git a/web-search/google/src/conversions.rs b/web-search/google/src/conversions.rs new file mode 100644 index 000000000..963cc75fc --- /dev/null +++ b/web-search/google/src/conversions.rs @@ -0,0 +1,547 @@ +#[cfg(test)] +use crate::client::{GoogleQueries, GoogleQueryInfo, GoogleSearchInformation, GoogleSearchItem}; +use crate::client::{GoogleSearchRequest, GoogleSearchResponse}; +use golem_web_search::golem::web_search::types::{ + SafeSearchLevel, SearchMetadata, SearchParams, SearchResult, TimeRange, +}; + +pub fn convert_params_to_request( + params: &SearchParams, + start_index: Option, +) -> GoogleSearchRequest { + let mut request = GoogleSearchRequest { + q: params.query.clone(), + cx: String::new(), + key: String::new(), + num: params.max_results, + start: start_index, + safe: params.safe_search.as_ref().map(|s| match s { + SafeSearchLevel::Off => "off".to_string(), + SafeSearchLevel::Medium => "medium".to_string(), + SafeSearchLevel::High => "high".to_string(), + }), + lr: params + .language + .as_ref() + .and_then(|l| language_code_to_google(l)), + gl: params + .region + .as_ref() + .and_then(|r| country_code_to_google(r)), + date_restrict: params.time_range.as_ref().map(|tr| match tr { + TimeRange::Day => "d1".to_string(), + TimeRange::Week => "w1".to_string(), + TimeRange::Month => "m1".to_string(), + TimeRange::Year => "y1".to_string(), + }), + site_search: None, + site_search_filter: None, + }; + + if let Some(include_domains) = ¶ms.include_domains { + if !include_domains.is_empty() { + request.site_search = Some( + include_domains + .iter() + .map(|domain| format!("site:{domain}")) + .collect::>() + .join(" OR "), + ); + request.site_search_filter = Some("i".to_string()); + } + } else if let Some(exclude_domains) = ¶ms.exclude_domains { + if !exclude_domains.is_empty() { + request.site_search = Some( + exclude_domains + .iter() + .map(|domain| format!("site:{domain}")) + .collect::>() + .join(" OR "), + ); + request.site_search_filter = Some("e".to_string()); + } + } + + request +} + +pub fn country_code_to_google(country_code: &str) -> Option { + match country_code.to_lowercase().as_str() { + "us" | "usa" | "united states" => Some("us".to_string()), + "uk" | "gb" | "united kingdom" => Some("uk".to_string()), + "ca" | "canada" => Some("ca".to_string()), + "au" | "australia" => Some("au".to_string()), + "de" | "germany" => Some("de".to_string()), + "fr" | "france" => Some("fr".to_string()), + "es" | "spain" => Some("es".to_string()), + "it" | "italy" => Some("it".to_string()), + "jp" | "japan" => Some("jp".to_string()), + "br" | "brazil" => Some("br".to_string()), + "in" | "india" => Some("in".to_string()), + "cn" | "china" => Some("cn".to_string()), + "ru" | "russia" => Some("ru".to_string()), + "mx" | "mexico" => Some("mx".to_string()), + "ar" | "argentina" => Some("ar".to_string()), + "cl" | "chile" => Some("cl".to_string()), + "co" | "colombia" => Some("co".to_string()), + "pe" | "peru" => Some("pe".to_string()), + "za" | "south africa" => Some("za".to_string()), + "ng" | "nigeria" => Some("ng".to_string()), + "eg" | "egypt" => Some("eg".to_string()), + "kr" | "south korea" => Some("kr".to_string()), + "th" | "thailand" => Some("th".to_string()), + "sg" | "singapore" => Some("sg".to_string()), + "my" | "malaysia" => Some("my".to_string()), + "id" | "indonesia" => Some("id".to_string()), + "ph" | "philippines" => Some("ph".to_string()), + "vn" | "vietnam" => Some("vn".to_string()), + "tw" | "taiwan" => Some("tw".to_string()), + "hk" | "hong kong" => Some("hk".to_string()), + "nl" | "netherlands" => Some("nl".to_string()), + "be" | "belgium" => Some("be".to_string()), + "ch" | "switzerland" => Some("ch".to_string()), + "at" | "austria" => Some("at".to_string()), + "se" | "sweden" => Some("se".to_string()), + "no" | "norway" => Some("no".to_string()), + "dk" | "denmark" => Some("dk".to_string()), + "fi" | "finland" => Some("fi".to_string()), + "pl" | "poland" => Some("pl".to_string()), + "cz" | "czech republic" => Some("cz".to_string()), + "hu" | "hungary" => Some("hu".to_string()), + "gr" | "greece" => Some("gr".to_string()), + "pt" | "portugal" => Some("pt".to_string()), + "tr" | "turkey" => Some("tr".to_string()), + "il" | "israel" => Some("il".to_string()), + "ae" | "uae" | "united arab emirates" => Some("ae".to_string()), + "sa" | "saudi arabia" => Some("sa".to_string()), + "nz" | "new zealand" => Some("nz".to_string()), + _ => Some(country_code.to_lowercase()), + } +} + +pub fn language_code_to_google(language_code: &str) -> Option { + let input = language_code.to_lowercase(); + + if input.starts_with("lang_") { + return Some(language_code.to_string()); + } + + let lang_code = match input.as_str() { + "en" | "english" => "en", + "es" | "spanish" => "es", + "fr" | "french" => "fr", + "de" | "german" => "de", + "it" | "italian" => "it", + "pt" | "portuguese" => "pt", + "ru" | "russian" => "ru", + "zh" | "chinese" => "zh", + "ja" | "japanese" => "ja", + "ko" | "korean" => "ko", + "ar" | "arabic" => "ar", + "hi" | "hindi" => "hi", + "th" | "thai" => "th", + "vi" | "vietnamese" => "vi", + "id" | "indonesian" => "id", + "ms" | "malay" => "ms", + "tl" | "tagalog" => "tl", + "nl" | "dutch" => "nl", + "sv" | "swedish" => "sv", + "no" | "norwegian" => "no", + "da" | "danish" => "da", + "fi" | "finnish" => "fi", + "pl" | "polish" => "pl", + "cs" | "czech" => "cs", + "hu" | "hungarian" => "hu", + "el" | "greek" => "el", + "tr" | "turkish" => "tr", + "he" | "hebrew" => "he", + "fa" | "persian" => "fa", + "ur" | "urdu" => "ur", + "bn" | "bengali" => "bn", + "ta" | "tamil" => "ta", + "te" | "telugu" => "te", + "ml" | "malayalam" => "ml", + "kn" | "kannada" => "kn", + "gu" | "gujarati" => "gu", + "pa" | "punjabi" => "pa", + "mr" | "marathi" => "mr", + "ne" | "nepali" => "ne", + "si" | "sinhala" => "si", + "my" | "myanmar" => "my", + "km" | "khmer" => "km", + "lo" | "lao" => "lo", + "ka" | "georgian" => "ka", + "hy" | "armenian" => "hy", + "az" | "azerbaijani" => "az", + "kk" | "kazakh" => "kk", + "ky" | "kyrgyz" => "ky", + "mn" | "mongolian" => "mn", + "uz" | "uzbek" => "uz", + "uk" | "ukrainian" => "uk", + "bg" | "bulgarian" => "bg", + "hr" | "croatian" => "hr", + "sr" | "serbian" => "sr", + "bs" | "bosnian" => "bs", + "mk" | "macedonian" => "mk", + "sl" | "slovenian" => "sl", + "sk" | "slovak" => "sk", + "ro" | "romanian" => "ro", + "lv" | "latvian" => "lv", + "lt" | "lithuanian" => "lt", + "et" | "estonian" => "et", + "mt" | "maltese" => "mt", + "is" | "icelandic" => "is", + "ga" | "irish" => "ga", + "cy" | "welsh" => "cy", + "eu" | "basque" => "eu", + "ca" | "catalan" => "ca", + "gl" | "galician" => "gl", + "af" | "afrikaans" => "af", + "sw" | "swahili" => "sw", + "am" | "amharic" => "am", + "or" | "oriya" => "or", + "as" | "assamese" => "as", + "sd" | "sindhi" => "sd", + "ps" | "pashto" => "ps", + "tg" | "tajik" => "tg", + "tk" | "turkmen" => "tk", + _ => &input, + }; + Some(format!("lang_{lang_code}")) +} + +pub fn convert_response_to_results( + response: GoogleSearchResponse, + params: &SearchParams, +) -> (Vec, Option) { + let results = if let Some(items) = response.items { + items + .into_iter() + .map(|item| SearchResult { + title: item.title, + url: item.link, + snippet: item.snippet, + display_url: item.display_link, + source: Some("Google".to_string()), + score: None, + html_snippet: if params.include_html.unwrap_or(false) { + item.html_snippet + } else { + None + }, + date_published: None, + images: None, + content_chunks: None, + }) + .collect() + } else { + Vec::new() + }; + + let metadata = SearchMetadata { + query: params.query.clone(), + total_results: response + .search_information + .as_ref() + .and_then(|info| info.total_results.as_ref()) + .and_then(|s| s.parse::().ok()), + search_time_ms: response + .search_information + .as_ref() + .and_then(|info| info.search_time) + .map(|t| t * 1000.0), + safe_search: params.safe_search, + language: params.language.clone(), + region: params.region.clone(), + next_page_token: response + .queries + .as_ref() + .and_then(|q| q.next_page.as_ref()) + .and_then(|np| np.first()) + .and_then(|np| np.start_index) + .map(|idx| idx.to_string()), + rate_limits: None, + }; + + (results, Some(metadata)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn create_test_params() -> SearchParams { + SearchParams { + query: "test query".to_string(), + safe_search: Some(SafeSearchLevel::Medium), + language: Some("en".to_string()), + region: Some("us".to_string()), + max_results: Some(10), + time_range: Some(TimeRange::Week), + include_domains: Some(vec!["example.com".to_string(), "test.org".to_string()]), + exclude_domains: None, + include_images: Some(true), + include_html: Some(true), + advanced_answer: Some(false), + } + } + + fn create_test_response() -> GoogleSearchResponse { + GoogleSearchResponse { + items: Some(vec![ + GoogleSearchItem { + title: "Test Result 1".to_string(), + link: "https://example.com/1".to_string(), + snippet: "This is a test snippet 1".to_string(), + display_link: Some("example.com".to_string()), + html_snippet: Some("Test snippet 1".to_string()), + pagemap: None, + }, + GoogleSearchItem { + title: "Test Result 2".to_string(), + link: "https://test.org/2".to_string(), + snippet: "This is a test snippet 2".to_string(), + display_link: Some("test.org".to_string()), + html_snippet: Some("Test snippet 2".to_string()), + pagemap: None, + }, + ]), + search_information: Some(GoogleSearchInformation { + total_results: Some("1000".to_string()), + search_time: Some(0.15), + }), + queries: Some(GoogleQueries { + request: None, + next_page: Some(vec![GoogleQueryInfo { + title: "Next Page".to_string(), + total_results: Some("1000".to_string()), + search_terms: "test query".to_string(), + count: Some(10), + start_index: Some(11), + }]), + }), + error: None, + } + } + + #[test] + fn test_convert_params_to_request_basic() { + let params = SearchParams { + query: "basic test".to_string(), + safe_search: None, + language: None, + region: None, + max_results: None, + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + let request = convert_params_to_request(¶ms, None); + + assert_eq!(request.q, "basic test"); + assert_eq!(request.cx, ""); + assert_eq!(request.key, ""); + assert_eq!(request.num, None); + assert_eq!(request.start, None); + assert_eq!(request.safe, None); + assert_eq!(request.lr, None); + assert_eq!(request.gl, None); + assert_eq!(request.date_restrict, None); + assert_eq!(request.site_search, None); + assert_eq!(request.site_search_filter, None); + } + + #[test] + fn test_convert_params_to_request_full() { + let params = create_test_params(); + let request = convert_params_to_request(¶ms, Some(21)); + + assert_eq!(request.q, "test query"); + assert_eq!(request.num, Some(10)); + assert_eq!(request.start, Some(21)); + assert_eq!(request.safe, Some("medium".to_string())); + assert_eq!(request.lr, Some("lang_en".to_string())); + assert_eq!(request.gl, Some("us".to_string())); + assert_eq!(request.date_restrict, Some("w1".to_string())); + assert_eq!( + request.site_search, + Some("site:example.com OR site:test.org".to_string()) + ); + assert_eq!(request.site_search_filter, Some("i".to_string())); + } + + #[test] + fn test_convert_params_safe_search_levels() { + let test_cases = vec![ + (SafeSearchLevel::Off, "off"), + (SafeSearchLevel::Medium, "medium"), + (SafeSearchLevel::High, "high"), + ]; + + for (level, expected) in test_cases { + let params = SearchParams { + query: "test".to_string(), + safe_search: Some(level), + language: None, + region: None, + max_results: None, + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + let request = convert_params_to_request(¶ms, None); + assert_eq!(request.safe, Some(expected.to_string())); + } + } + + #[test] + fn test_convert_params_time_ranges() { + let test_cases = vec![ + (TimeRange::Day, "d1"), + (TimeRange::Week, "w1"), + (TimeRange::Month, "m1"), + (TimeRange::Year, "y1"), + ]; + + for (range, expected) in test_cases { + let params = SearchParams { + query: "test".to_string(), + safe_search: None, + language: None, + region: None, + max_results: None, + time_range: Some(range), + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + let request = convert_params_to_request(¶ms, None); + assert_eq!(request.date_restrict, Some(expected.to_string())); + } + } + + #[test] + fn test_convert_params_exclude_domains() { + let params = SearchParams { + query: "test".to_string(), + safe_search: None, + language: None, + region: None, + max_results: None, + time_range: None, + include_domains: None, + exclude_domains: Some(vec!["spam.com".to_string(), "bad.org".to_string()]), + include_images: None, + include_html: None, + advanced_answer: None, + }; + + let request = convert_params_to_request(¶ms, None); + assert_eq!( + request.site_search, + Some("site:spam.com OR site:bad.org".to_string()) + ); + assert_eq!(request.site_search_filter, Some("e".to_string())); + } + + #[test] + fn test_convert_response_to_results_basic() { + let params = create_test_params(); + let response = create_test_response(); + + let (results, metadata) = convert_response_to_results(response, ¶ms); + + assert_eq!(results.len(), 2); + + assert_eq!(results[0].title, "Test Result 1"); + assert_eq!(results[0].url, "https://example.com/1"); + assert_eq!(results[0].snippet, "This is a test snippet 1"); + assert_eq!(results[0].display_url, Some("example.com".to_string())); + assert_eq!(results[0].source, Some("Google".to_string())); + assert_eq!(results[0].score, None); + assert_eq!( + results[0].html_snippet, + Some("Test snippet 1".to_string()) + ); + assert_eq!(results[0].date_published, None); + assert_eq!(results[0].images, None); + assert_eq!(results[0].content_chunks, None); + + assert_eq!(results[1].title, "Test Result 2"); + assert_eq!(results[1].url, "https://test.org/2"); + assert_eq!(results[1].snippet, "This is a test snippet 2"); + assert_eq!(results[1].display_url, Some("test.org".to_string())); + + assert!(metadata.is_some()); + let meta = metadata.unwrap(); + assert_eq!(meta.query, "test query"); + assert_eq!(meta.total_results, Some(1000)); + assert_eq!(meta.search_time_ms, Some(150.0)); // 0.15 * 1000 + assert_eq!(meta.safe_search, Some(SafeSearchLevel::Medium)); + assert_eq!(meta.language, Some("en".to_string())); + assert_eq!(meta.region, Some("us".to_string())); + assert_eq!(meta.next_page_token, Some("11".to_string())); + assert_eq!(meta.rate_limits, None); + } + + #[test] + fn test_convert_response_to_results_no_html() { + let mut params = create_test_params(); + params.include_html = Some(false); + let response = create_test_response(); + + let (results, _) = convert_response_to_results(response, ¶ms); + + assert_eq!(results.len(), 2); + assert_eq!(results[0].html_snippet, None); + assert_eq!(results[1].html_snippet, None); + } + + #[test] + fn test_convert_response_to_results_empty() { + let params = create_test_params(); + let response = GoogleSearchResponse { + items: None, + search_information: None, + queries: None, + error: None, + }; + + let (results, metadata) = convert_response_to_results(response, ¶ms); + + assert_eq!(results.len(), 0); + assert!(metadata.is_some()); + let meta = metadata.unwrap(); + assert_eq!(meta.query, "test query"); + assert_eq!(meta.total_results, None); + assert_eq!(meta.search_time_ms, None); + assert_eq!(meta.next_page_token, None); + } + + #[test] + fn test_convert_response_malformed_total_results() { + let params = create_test_params(); + let mut response = create_test_response(); + response.search_information = Some(GoogleSearchInformation { + total_results: Some("not_a_number".to_string()), + search_time: Some(0.25), + }); + + let (_, metadata) = convert_response_to_results(response, ¶ms); + + let meta = metadata.unwrap(); + assert_eq!(meta.total_results, None); + assert_eq!(meta.search_time_ms, Some(250.0)); + } +} diff --git a/web-search/google/src/lib.rs b/web-search/google/src/lib.rs new file mode 100644 index 000000000..6511b8030 --- /dev/null +++ b/web-search/google/src/lib.rs @@ -0,0 +1,122 @@ +use crate::client::GoogleSearchApi; +use crate::conversions::{convert_params_to_request, convert_response_to_results}; +use golem_web_search::config::with_config_key; +use golem_web_search::durability::DurableWebSearch; +use golem_web_search::golem::web_search::types::{ + SearchError, SearchMetadata, SearchParams, SearchResult, +}; +use golem_web_search::golem_web_search::web_search::web_search::{ + Guest, GuestSearchSession, SearchSession, +}; +use golem_web_search::LOGGING_STATE; +use std::cell::RefCell; + +mod client; +mod conversions; + +struct GoogleWebSearchComponent; + +impl GoogleWebSearchComponent { + const API_KEY_ENV_VAR: &'static str = "GOOGLE_API_KEY"; + const SEARCH_ENGINE_ID_ENV_VAR: &'static str = "GOOGLE_SEARCH_ENGINE_ID"; +} + +pub struct GoogleSearchSession { + client: GoogleSearchApi, + params: SearchParams, + current_start_index: RefCell, + last_metadata: RefCell>, + has_more_results: RefCell, +} + +impl GoogleSearchSession { + fn new(client: GoogleSearchApi, params: SearchParams) -> Self { + Self { + client, + params, + current_start_index: RefCell::new(0), + last_metadata: RefCell::new(None), + has_more_results: RefCell::new(true), + } + } +} + +impl GuestSearchSession for GoogleSearchSession { + fn next_page(&self) -> Result { + if !*self.has_more_results.borrow_mut() { + return Err(SearchError::BackendError( + "No more results available".to_string(), + )); + } + *self.current_start_index.borrow_mut() = *self.current_start_index.borrow_mut() + 1_u32; + let request = + convert_params_to_request(&self.params, Some(*self.current_start_index.borrow())); + let response = self.client.search(request)?; + let (results, metadata) = convert_response_to_results(response, &self.params); + + *self.last_metadata.borrow_mut() = metadata.clone(); + + if results.is_empty() { + *self.has_more_results.borrow_mut() = false; + return Err(SearchError::BackendError("No more results".to_string())); + } + + if let Some(metadata) = &metadata { + *self.has_more_results.borrow_mut() = metadata.next_page_token.is_some(); + } else { + *self.has_more_results.borrow_mut() = false; + } + + results + .into_iter() + .next() + .ok_or_else(|| SearchError::BackendError("No results returned".to_string())) + } + + fn get_metadata(&self) -> Option { + self.last_metadata.borrow().clone() + } +} + +impl Guest for GoogleWebSearchComponent { + type SearchSession = GoogleSearchSession; + + fn start_search(params: SearchParams) -> Result { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + + with_config_key( + &[Self::API_KEY_ENV_VAR, Self::SEARCH_ENGINE_ID_ENV_VAR], + Err, + |keys| { + let api_key = keys.get(Self::API_KEY_ENV_VAR).unwrap().to_owned(); + let search_engine_id = keys.get(Self::SEARCH_ENGINE_ID_ENV_VAR).unwrap().to_owned(); + let client = GoogleSearchApi::new(api_key, search_engine_id); + Ok(SearchSession::new(GoogleSearchSession::new(client, params))) + }, + ) + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + + with_config_key( + &[Self::API_KEY_ENV_VAR, Self::SEARCH_ENGINE_ID_ENV_VAR], + Err, + |keys| { + let api_key = keys.get(Self::API_KEY_ENV_VAR).unwrap().to_owned(); + let search_engine_id = keys.get(Self::SEARCH_ENGINE_ID_ENV_VAR).unwrap().to_owned(); + let client = GoogleSearchApi::new(api_key, search_engine_id); + let request = convert_params_to_request(¶ms, None); + let response = client.search(request)?; + let (results, metadata) = convert_response_to_results(response, ¶ms); + Ok((results, metadata)) + }, + ) + } +} + +type DurableGoogleWebSearchComponent = DurableWebSearch; + +golem_web_search::export_web_search!(DurableGoogleWebSearchComponent with_types_in golem_web_search); diff --git a/web-search/google/wit/deps/golem-web-search/golem-web-search.wit b/web-search/google/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..4279b7733 --- /dev/null +++ b/web-search/google/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + + + +world web-search-library { + export web-search; +} diff --git a/web-search/google/wit/deps/wasi:io/error.wit b/web-search/google/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/web-search/google/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/web-search/google/wit/deps/wasi:io/poll.wit b/web-search/google/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/web-search/google/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/web-search/google/wit/deps/wasi:io/streams.wit b/web-search/google/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/web-search/google/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/web-search/google/wit/deps/wasi:io/world.wit b/web-search/google/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/web-search/google/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/web-search/google/wit/google.wit b/web-search/google/wit/google.wit new file mode 100644 index 000000000..7aa5dd344 --- /dev/null +++ b/web-search/google/wit/google.wit @@ -0,0 +1,5 @@ +package golem:web-search-google@1.0.0; + +world web-search-library { + include golem:web-search/web-search-library@1.0.0; +} diff --git a/web-search/serper/Cargo.toml b/web-search/serper/Cargo.toml new file mode 100644 index 000000000..2c5e90c9e --- /dev/null +++ b/web-search/serper/Cargo.toml @@ -0,0 +1,43 @@ +[package] +name = "golem-websearch-serper" +version = "0.0.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly component for working with Serper Search API, with special support for Golem Cloud" + +[lib] +path = "src/lib.rs" +crate-type = ["cdylib"] + +[features] +default = ["durability"] +durability = ["golem-rust/durability", "golem-web-search/durability"] + +[dependencies] +golem-web-search = { path = "../web-search", version = "0.0.0", default-features = false } + +golem-rust = { workspace = true } +log = { workspace = true } +reqwest = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } +base64 = { workspace = true } + +[package.metadata.component] +package = "golem:websearch-serper" + +[package.metadata.component.bindings] +generate_unused_types = true + +[package.metadata.component.bindings.with] +"golem:web-search/web-search@1.0.0" = "golem_web_search::golem::web_search::web_search" + +[package.metadata.component.target] +path = "wit" + +[package.metadata.component.target.dependencies] +"golem:web-search" = { path = "wit/deps/golem-web-search" } +"wasi:io" = { path = "wit/deps/wasi:io"} diff --git a/web-search/serper/src/bindings.rs b/web-search/serper/src/bindings.rs new file mode 100644 index 000000000..431fa2fd0 --- /dev/null +++ b/web-search/serper/src/bindings.rs @@ -0,0 +1,318 @@ +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! +// Options used: +// * runtime_path: "wit_bindgen_rt" +// * with "golem:web-search/web-search@1.0.0" = "golem_web_search::golem::web_search::web_search" +// * generate_unused_types +use golem_web_search::golem::web_search::web_search as __with_name0; +#[rustfmt::skip] +#[allow(dead_code, clippy::all)] +pub mod golem { + pub mod web_search { + #[allow(dead_code, async_fn_in_trait, unused_imports, clippy::all)] + pub mod types { + #[used] + #[doc(hidden)] + static __FORCE_SECTION_REF: fn() = super::super::super::__link_custom_section_describing_imports; + use super::super::super::_rt; + /// Optional image-related result data + #[derive(Clone)] + pub struct ImageResult { + pub url: _rt::String, + pub description: Option<_rt::String>, + } + impl ::core::fmt::Debug for ImageResult { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("ImageResult") + .field("url", &self.url) + .field("description", &self.description) + .finish() + } + } + /// Core structure for a single search result + #[derive(Clone)] + pub struct SearchResult { + pub title: _rt::String, + pub url: _rt::String, + pub snippet: _rt::String, + pub display_url: Option<_rt::String>, + pub source: Option<_rt::String>, + pub score: Option, + pub html_snippet: Option<_rt::String>, + pub date_published: Option<_rt::String>, + pub images: Option<_rt::Vec>, + pub content_chunks: Option<_rt::Vec<_rt::String>>, + } + impl ::core::fmt::Debug for SearchResult { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchResult") + .field("title", &self.title) + .field("url", &self.url) + .field("snippet", &self.snippet) + .field("display-url", &self.display_url) + .field("source", &self.source) + .field("score", &self.score) + .field("html-snippet", &self.html_snippet) + .field("date-published", &self.date_published) + .field("images", &self.images) + .field("content-chunks", &self.content_chunks) + .finish() + } + } + /// Safe search settings + #[repr(u8)] + #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] + pub enum SafeSearchLevel { + Off, + Medium, + High, + } + impl ::core::fmt::Debug for SafeSearchLevel { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + SafeSearchLevel::Off => { + f.debug_tuple("SafeSearchLevel::Off").finish() + } + SafeSearchLevel::Medium => { + f.debug_tuple("SafeSearchLevel::Medium").finish() + } + SafeSearchLevel::High => { + f.debug_tuple("SafeSearchLevel::High").finish() + } + } + } + } + impl SafeSearchLevel { + #[doc(hidden)] + pub unsafe fn _lift(val: u8) -> SafeSearchLevel { + if !cfg!(debug_assertions) { + return ::core::mem::transmute(val); + } + match val { + 0 => SafeSearchLevel::Off, + 1 => SafeSearchLevel::Medium, + 2 => SafeSearchLevel::High, + _ => panic!("invalid enum discriminant"), + } + } + } + /// Rate limiting metadata + #[repr(C)] + #[derive(Clone, Copy)] + pub struct RateLimitInfo { + pub limit: u32, + pub remaining: u32, + pub reset_timestamp: u64, + } + impl ::core::fmt::Debug for RateLimitInfo { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("RateLimitInfo") + .field("limit", &self.limit) + .field("remaining", &self.remaining) + .field("reset-timestamp", &self.reset_timestamp) + .finish() + } + } + /// Optional metadata for a search session + #[derive(Clone)] + pub struct SearchMetadata { + pub query: _rt::String, + pub total_results: Option, + pub search_time_ms: Option, + pub safe_search: Option, + pub language: Option<_rt::String>, + pub region: Option<_rt::String>, + pub next_page_token: Option<_rt::String>, + pub rate_limits: Option, + } + impl ::core::fmt::Debug for SearchMetadata { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchMetadata") + .field("query", &self.query) + .field("total-results", &self.total_results) + .field("search-time-ms", &self.search_time_ms) + .field("safe-search", &self.safe_search) + .field("language", &self.language) + .field("region", &self.region) + .field("next-page-token", &self.next_page_token) + .field("rate-limits", &self.rate_limits) + .finish() + } + } + /// Supported time range filtering + #[repr(u8)] + #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] + pub enum TimeRange { + Day, + Week, + Month, + Year, + } + impl ::core::fmt::Debug for TimeRange { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + TimeRange::Day => f.debug_tuple("TimeRange::Day").finish(), + TimeRange::Week => f.debug_tuple("TimeRange::Week").finish(), + TimeRange::Month => f.debug_tuple("TimeRange::Month").finish(), + TimeRange::Year => f.debug_tuple("TimeRange::Year").finish(), + } + } + } + impl TimeRange { + #[doc(hidden)] + pub unsafe fn _lift(val: u8) -> TimeRange { + if !cfg!(debug_assertions) { + return ::core::mem::transmute(val); + } + match val { + 0 => TimeRange::Day, + 1 => TimeRange::Week, + 2 => TimeRange::Month, + 3 => TimeRange::Year, + _ => panic!("invalid enum discriminant"), + } + } + } + /// Query parameters accepted by the unified search API + #[derive(Clone)] + pub struct SearchParams { + pub query: _rt::String, + pub safe_search: Option, + pub language: Option<_rt::String>, + pub region: Option<_rt::String>, + pub max_results: Option, + pub time_range: Option, + pub include_domains: Option<_rt::Vec<_rt::String>>, + pub exclude_domains: Option<_rt::Vec<_rt::String>>, + pub include_images: Option, + pub include_html: Option, + pub advanced_answer: Option, + } + impl ::core::fmt::Debug for SearchParams { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchParams") + .field("query", &self.query) + .field("safe-search", &self.safe_search) + .field("language", &self.language) + .field("region", &self.region) + .field("max-results", &self.max_results) + .field("time-range", &self.time_range) + .field("include-domains", &self.include_domains) + .field("exclude-domains", &self.exclude_domains) + .field("include-images", &self.include_images) + .field("include-html", &self.include_html) + .field("advanced-answer", &self.advanced_answer) + .finish() + } + } + /// Structured search error + #[derive(Clone)] + pub enum SearchError { + InvalidQuery, + RateLimited(u32), + UnsupportedFeature(_rt::String), + BackendError(_rt::String), + } + impl ::core::fmt::Debug for SearchError { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + SearchError::InvalidQuery => { + f.debug_tuple("SearchError::InvalidQuery").finish() + } + SearchError::RateLimited(e) => { + f.debug_tuple("SearchError::RateLimited").field(e).finish() + } + SearchError::UnsupportedFeature(e) => { + f.debug_tuple("SearchError::UnsupportedFeature") + .field(e) + .finish() + } + SearchError::BackendError(e) => { + f.debug_tuple("SearchError::BackendError").field(e).finish() + } + } + } + } + impl ::core::fmt::Display for SearchError { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + write!(f, "{:?}", self) + } + } + impl std::error::Error for SearchError {} + } + } +} +#[rustfmt::skip] +mod _rt { + #![allow(dead_code, clippy::all)] + pub use alloc_crate::string::String; + pub use alloc_crate::vec::Vec; + extern crate alloc as alloc_crate; +} +#[cfg(target_arch = "wasm32")] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:web-search-serper@1.0.0:web-search-library:encoded world" +)] +#[doc(hidden)] +#[allow(clippy::octal_escapes)] +pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1377] = *b"\ +\0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xd8\x09\x01A\x02\x01\ +A\x08\x01B\x1c\x01ks\x01r\x02\x03urls\x0bdescription\0\x04\0\x0cimage-result\x03\ +\0\x01\x01ku\x01p\x02\x01k\x04\x01ps\x01k\x06\x01r\x0a\x05titles\x03urls\x07snip\ +pets\x0bdisplay-url\0\x06source\0\x05score\x03\x0chtml-snippet\0\x0edate-publish\ +ed\0\x06images\x05\x0econtent-chunks\x07\x04\0\x0dsearch-result\x03\0\x08\x01m\x03\ +\x03off\x06medium\x04high\x04\0\x11safe-search-level\x03\0\x0a\x01r\x03\x05limit\ +y\x09remainingy\x0freset-timestampw\x04\0\x0frate-limit-info\x03\0\x0c\x01kw\x01\ +k\x0b\x01k\x0d\x01r\x08\x05querys\x0dtotal-results\x0e\x0esearch-time-ms\x03\x0b\ +safe-search\x0f\x08language\0\x06region\0\x0fnext-page-token\0\x0brate-limits\x10\ +\x04\0\x0fsearch-metadata\x03\0\x11\x01m\x04\x03day\x04week\x05month\x04year\x04\ +\0\x0atime-range\x03\0\x13\x01ky\x01k\x14\x01k\x7f\x01r\x0b\x05querys\x0bsafe-se\ +arch\x0f\x08language\0\x06region\0\x0bmax-results\x15\x0atime-range\x16\x0finclu\ +de-domains\x07\x0fexclude-domains\x07\x0einclude-images\x17\x0cinclude-html\x17\x0f\ +advanced-answer\x17\x04\0\x0dsearch-params\x03\0\x18\x01q\x04\x0dinvalid-query\0\ +\0\x0crate-limited\x01y\0\x13unsupported-feature\x01s\0\x0dbackend-error\x01s\0\x04\ +\0\x0csearch-error\x03\0\x1a\x03\0\x1cgolem:web-search/types@1.0.0\x05\0\x02\x03\ +\0\0\x0dsearch-params\x02\x03\0\0\x0dsearch-result\x02\x03\0\0\x0fsearch-metadat\ +a\x02\x03\0\0\x0csearch-error\x01B\x19\x02\x03\x02\x01\x01\x04\0\x0dsearch-param\ +s\x03\0\0\x02\x03\x02\x01\x02\x04\0\x0dsearch-result\x03\0\x02\x02\x03\x02\x01\x03\ +\x04\0\x0fsearch-metadata\x03\0\x04\x02\x03\x02\x01\x04\x04\0\x0csearch-error\x03\ +\0\x06\x04\0\x0esearch-session\x03\x01\x01h\x08\x01j\x01\x03\x01\x07\x01@\x01\x04\ +self\x09\0\x0a\x04\0\x20[method]search-session.next-page\x01\x0b\x01k\x05\x01@\x01\ +\x04self\x09\0\x0c\x04\0#[method]search-session.get-metadata\x01\x0d\x01i\x08\x01\ +j\x01\x0e\x01\x07\x01@\x01\x06params\x01\0\x0f\x04\0\x0cstart-search\x01\x10\x01\ +p\x03\x01o\x02\x11\x0c\x01j\x01\x12\x01\x07\x01@\x01\x06params\x01\0\x13\x04\0\x0b\ +search-once\x01\x14\x04\0!golem:web-search/web-search@1.0.0\x05\x05\x04\00golem:\ +web-search-serper/web-search-library@1.0.0\x04\0\x0b\x18\x01\0\x12web-search-lib\ +rary\x03\0\0\0G\x09producers\x01\x0cprocessed-by\x02\x0dwit-component\x070.227.1\ +\x10wit-bindgen-rust\x060.41.0"; +#[inline(never)] +#[doc(hidden)] +pub fn __link_custom_section_describing_imports() { + wit_bindgen_rt::maybe_link_cabi_realloc(); +} diff --git a/web-search/serper/src/client.rs b/web-search/serper/src/client.rs new file mode 100644 index 000000000..0f35f2dc5 --- /dev/null +++ b/web-search/serper/src/client.rs @@ -0,0 +1,289 @@ +use golem_web_search::error::from_reqwest_error; +use golem_web_search::golem::web_search::types::SearchError; +use log::trace; +use reqwest::{header, Client, Method, Response, StatusCode}; +use serde::{Deserialize, Serialize}; + +const BASE_URL: &str = "https://google.serper.dev/search"; + +pub struct SerperSearchApi { + api_key: String, + client: Client, +} + +impl SerperSearchApi { + pub fn new(api_key: String) -> Self { + let client = Client::builder() + .build() + .expect("Failed to initialize HTTP client"); + Self { api_key, client } + } + + pub fn search( + &self, + request: SerperSearchRequest, + ) -> Result { + trace!("Sending request to Serper Search API: {request:?}"); + + let response: Response = self + .client + .request(Method::POST, BASE_URL) + .header(header::ACCEPT, "application/json") + .header(header::CONTENT_TYPE, "application/json") + .header(header::USER_AGENT, "golem-websearch/1.0") + .header("X-API-KEY", &self.api_key) + .json(&[request]) // Serper expects an array of requests + .send() + .map_err(|err| from_reqwest_error("Request failed", err))?; + + parse_response(response) + } +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperSearchRequest { + pub q: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub location: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub gl: Option, // Country code + #[serde(skip_serializing_if = "Option::is_none")] + pub hl: Option, // Language code + #[serde(skip_serializing_if = "Option::is_none")] + pub num: Option, // Number of results + #[serde(skip_serializing_if = "Option::is_none")] + pub autocorrect: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub tbs: Option, // Time-based search filters + #[serde(skip_serializing_if = "Option::is_none")] + pub page: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperSearchResponse { + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "searchParameters")] + pub search_parameters: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub organic: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "answerBox")] + pub answer_box: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "knowledgeGraph")] + pub knowledge_graph: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "peopleAlsoAsk")] + pub people_also_ask: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "relatedSearches")] + pub related_searches: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub images: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub videos: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub news: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "searchInformation")] + pub search_information: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperSearchParameters { + pub q: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub gl: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub hl: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub num: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub autocorrect: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub page: Option, + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub search_type: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperOrganicResult { + pub title: String, + pub link: String, + pub snippet: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub position: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub date: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub sitelinks: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperSitelink { + pub title: String, + pub link: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperAnswerBox { + pub title: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub answer: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub link: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub snippet: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperKnowledgeGraph { + pub title: String, + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub kg_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub website: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "imageUrl")] + pub image_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "descriptionSource")] + pub description_source: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "descriptionLink")] + pub description_link: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub attributes: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperPeopleAlsoAsk { + pub question: String, + pub snippet: String, + pub title: String, + pub link: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperRelatedSearch { + pub query: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperImage { + pub title: String, + #[serde(rename = "imageUrl")] + pub image_url: String, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "imageWidth")] + pub image_width: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "imageHeight")] + pub image_height: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "thumbnailUrl")] + pub thumbnail_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "thumbnailWidth")] + pub thumbnail_width: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "thumbnailHeight")] + pub thumbnail_height: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub source: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub domain: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub link: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub position: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperVideo { + pub title: String, + pub link: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub snippet: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "imageUrl")] + pub image_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub duration: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub source: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub channel: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub date: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub position: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperNews { + pub title: String, + pub link: String, + pub snippet: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub date: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub source: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "imageUrl")] + pub image_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub position: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SerperSearchInformation { + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "totalResults")] + pub total_results: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "timeTaken")] + pub time_taken: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "originalQuery")] + pub original_query: Option, +} + +fn parse_response(response: Response) -> Result { + match response.status() { + StatusCode::OK => { + let body = response.text().map_err(|e| { + SearchError::BackendError(format!("Failed to read response body: {e}")) + })?; + + // Serper returns an array of responses when we send an array, we take the first one + let parsed_array = + serde_json::from_str::>(&body).map_err(|e| { + SearchError::BackendError(format!( + "Failed to parse response as array: {e} \nRaw body: {body}" + )) + })?; + + parsed_array + .into_iter() + .next() + .ok_or_else(|| SearchError::BackendError("Empty response array".to_string())) + } + StatusCode::TOO_MANY_REQUESTS => Err(SearchError::RateLimited(60)), + StatusCode::BAD_REQUEST => Err(SearchError::InvalidQuery), + StatusCode::UNAUTHORIZED => Err(SearchError::BackendError("Invalid API key".to_string())), + _ => { + let status = response.status(); + let body = response + .text() + .unwrap_or_else(|_| "".into()); + Err(SearchError::BackendError(format!( + "Request failed: {status} \nRaw body: {body}" + ))) + } + } +} diff --git a/web-search/serper/src/conversions.rs b/web-search/serper/src/conversions.rs new file mode 100644 index 000000000..fd6147a5e --- /dev/null +++ b/web-search/serper/src/conversions.rs @@ -0,0 +1,280 @@ +use crate::client::{SerperSearchRequest, SerperSearchResponse}; +use golem_web_search::golem::web_search::types::{ + ImageResult, SearchMetadata, SearchParams, SearchResult, TimeRange, +}; + +pub fn convert_params_to_request(params: &SearchParams, page: Option) -> SerperSearchRequest { + let num = params.max_results.unwrap_or(10).min(100); + + SerperSearchRequest { + q: params.query.clone(), + location: params.region.clone(), + gl: country_code_to_serper(params.region.as_ref().unwrap_or(&"us".to_string())), + hl: language_code_to_serper(params.language.as_ref().unwrap_or(&"en".to_string())), + num: Some(num), + autocorrect: Some(true), + tbs: params.time_range.as_ref().map(|tr| match tr { + TimeRange::Day => "d".to_string(), + TimeRange::Week => "w".to_string(), + TimeRange::Month => "m".to_string(), + TimeRange::Year => "y".to_string(), + }), + page, + } +} + +pub fn country_code_to_serper(country_code: &str) -> Option { + match country_code.to_lowercase().as_str() { + "us" | "usa" | "united states" => Some("us".to_string()), + "uk" | "gb" | "united kingdom" => Some("gb".to_string()), + "ca" | "canada" => Some("ca".to_string()), + "au" | "australia" => Some("au".to_string()), + "de" | "germany" => Some("de".to_string()), + "fr" | "france" => Some("fr".to_string()), + "es" | "spain" => Some("es".to_string()), + "it" | "italy" => Some("it".to_string()), + "jp" | "japan" => Some("jp".to_string()), + "br" | "brazil" => Some("br".to_string()), + "in" | "india" => Some("in".to_string()), + "cn" | "china" => Some("cn".to_string()), + "ru" | "russia" => Some("ru".to_string()), + "mx" | "mexico" => Some("mx".to_string()), + "ar" | "argentina" => Some("ar".to_string()), + "cl" | "chile" => Some("cl".to_string()), + "co" | "colombia" => Some("co".to_string()), + "pe" | "peru" => Some("pe".to_string()), + "za" | "south africa" => Some("za".to_string()), + "ng" | "nigeria" => Some("ng".to_string()), + "eg" | "egypt" => Some("eg".to_string()), + "kr" | "south korea" => Some("kr".to_string()), + "th" | "thailand" => Some("th".to_string()), + "sg" | "singapore" => Some("sg".to_string()), + "my" | "malaysia" => Some("my".to_string()), + "id" | "indonesia" => Some("id".to_string()), + "ph" | "philippines" => Some("ph".to_string()), + "vn" | "vietnam" => Some("vn".to_string()), + "tw" | "taiwan" => Some("tw".to_string()), + "hk" | "hong kong" => Some("hk".to_string()), + "nl" | "netherlands" => Some("nl".to_string()), + "be" | "belgium" => Some("be".to_string()), + "ch" | "switzerland" => Some("ch".to_string()), + "at" | "austria" => Some("at".to_string()), + "se" | "sweden" => Some("se".to_string()), + "no" | "norway" => Some("no".to_string()), + "dk" | "denmark" => Some("dk".to_string()), + "fi" | "finland" => Some("fi".to_string()), + "pl" | "poland" => Some("pl".to_string()), + "cz" | "czech republic" => Some("cz".to_string()), + "hu" | "hungary" => Some("hu".to_string()), + "gr" | "greece" => Some("gr".to_string()), + "pt" | "portugal" => Some("pt".to_string()), + "tr" | "turkey" => Some("tr".to_string()), + "il" | "israel" => Some("il".to_string()), + "ae" | "uae" | "united arab emirates" => Some("ae".to_string()), + "sa" | "saudi arabia" => Some("sa".to_string()), + "nz" | "new zealand" => Some("nz".to_string()), + _ => Some(country_code.to_lowercase()), + } +} + +pub fn language_code_to_serper(language_code: &str) -> Option { + let input = language_code.to_lowercase(); + + let lang_code = if input.starts_with("lang_") { + input.strip_prefix("lang_").unwrap_or(&input) + } else { + &input + }; + + match lang_code { + "en" | "english" => Some("en".to_string()), + "es" | "spanish" => Some("es".to_string()), + "fr" | "french" => Some("fr".to_string()), + "de" | "german" => Some("de".to_string()), + "it" | "italian" => Some("it".to_string()), + "pt" | "portuguese" => Some("pt".to_string()), + "ru" | "russian" => Some("ru".to_string()), + "zh" | "chinese" => Some("zh".to_string()), + "ja" | "japanese" => Some("ja".to_string()), + "ko" | "korean" => Some("ko".to_string()), + "ar" | "arabic" => Some("ar".to_string()), + "hi" | "hindi" => Some("hi".to_string()), + "th" | "thai" => Some("th".to_string()), + "vi" | "vietnamese" => Some("vi".to_string()), + "id" | "indonesian" => Some("id".to_string()), + "ms" | "malay" => Some("ms".to_string()), + "tl" | "tagalog" => Some("tl".to_string()), + "nl" | "dutch" => Some("nl".to_string()), + "sv" | "swedish" => Some("sv".to_string()), + "no" | "norwegian" => Some("no".to_string()), + "da" | "danish" => Some("da".to_string()), + "fi" | "finnish" => Some("fi".to_string()), + "pl" | "polish" => Some("pl".to_string()), + "cs" | "czech" => Some("cs".to_string()), + "hu" | "hungarian" => Some("hu".to_string()), + "el" | "greek" => Some("el".to_string()), + "tr" | "turkish" => Some("tr".to_string()), + "he" | "hebrew" => Some("he".to_string()), + "fa" | "persian" => Some("fa".to_string()), + "ur" | "urdu" => Some("ur".to_string()), + "bn" | "bengali" => Some("bn".to_string()), + "ta" | "tamil" => Some("ta".to_string()), + "te" | "telugu" => Some("te".to_string()), + "ml" | "malayalam" => Some("ml".to_string()), + "kn" | "kannada" => Some("kn".to_string()), + "gu" | "gujarati" => Some("gu".to_string()), + "pa" | "punjabi" => Some("pa".to_string()), + "mr" | "marathi" => Some("mr".to_string()), + "ne" | "nepali" => Some("ne".to_string()), + "si" | "sinhala" => Some("si".to_string()), + "my" | "myanmar" => Some("my".to_string()), + "km" | "khmer" => Some("km".to_string()), + "lo" | "lao" => Some("lo".to_string()), + "ka" | "georgian" => Some("ka".to_string()), + "hy" | "armenian" => Some("hy".to_string()), + "az" | "azerbaijani" => Some("az".to_string()), + "kk" | "kazakh" => Some("kk".to_string()), + "ky" | "kyrgyz" => Some("ky".to_string()), + "mn" | "mongolian" => Some("mn".to_string()), + "uz" | "uzbek" => Some("uz".to_string()), + "uk" | "ukrainian" => Some("uk".to_string()), + "bg" | "bulgarian" => Some("bg".to_string()), + "hr" | "croatian" => Some("hr".to_string()), + "sr" | "serbian" => Some("sr".to_string()), + "bs" | "bosnian" => Some("bs".to_string()), + "mk" | "macedonian" => Some("mk".to_string()), + "sl" | "slovenian" => Some("sl".to_string()), + "sk" | "slovak" => Some("sk".to_string()), + "ro" | "romanian" => Some("ro".to_string()), + "lv" | "latvian" => Some("lv".to_string()), + "lt" | "lithuanian" => Some("lt".to_string()), + "et" | "estonian" => Some("et".to_string()), + "mt" | "maltese" => Some("mt".to_string()), + "is" | "icelandic" => Some("is".to_string()), + "ga" | "irish" => Some("ga".to_string()), + "cy" | "welsh" => Some("cy".to_string()), + "eu" | "basque" => Some("eu".to_string()), + "ca" | "catalan" => Some("ca".to_string()), + "gl" | "galician" => Some("gl".to_string()), + "af" | "afrikaans" => Some("af".to_string()), + "sw" | "swahili" => Some("sw".to_string()), + "am" | "amharic" => Some("am".to_string()), + "or" | "oriya" => Some("or".to_string()), + "as" | "assamese" => Some("as".to_string()), + "sd" | "sindhi" => Some("sd".to_string()), + "ps" | "pashto" => Some("ps".to_string()), + "tg" | "tajik" => Some("tg".to_string()), + "tk" | "turkmen" => Some("tk".to_string()), + _ => Some(lang_code.to_string()), + } +} + +pub fn convert_response_to_results( + response: SerperSearchResponse, + params: &SearchParams, +) -> (Vec, Option) { + let mut search_results = Vec::new(); + + if let Some(organic_results) = response.organic { + for result in organic_results { + let images = response.images.as_ref().and_then(|imgs| { + if !imgs.is_empty() { + Some( + imgs.iter() + .map(|img| ImageResult { + url: img.image_url.clone(), + description: Some(img.title.clone()), + }) + .collect(), + ) + } else { + None + } + }); + + search_results.push(SearchResult { + title: result.title, + url: result.link, + snippet: result.snippet, + display_url: None, + source: Some("Serper".to_string()), + score: result.position.map(|p| 1.0 / (p as f64 + 1.0)), + html_snippet: None, + date_published: result.date, + images, + content_chunks: None, + }); + } + } + + if let Some(answer_box) = response.answer_box { + search_results.insert( + 0, + SearchResult { + title: answer_box.title, + url: answer_box.link.unwrap_or_default(), + snippet: answer_box.answer.or(answer_box.snippet).unwrap_or_default(), + display_url: None, + source: Some("Serper Answer Box".to_string()), + score: Some(1.0), + html_snippet: None, + date_published: None, + images: None, + content_chunks: None, + }, + ); + } + + if let Some(kg) = response.knowledge_graph { + let kg_images = kg.image_url.map(|url| { + vec![ImageResult { + url, + description: Some(kg.title.clone()), + }] + }); + + search_results.insert( + 0, + SearchResult { + title: kg.title, + url: kg.website.unwrap_or_default(), + snippet: kg.description.unwrap_or_default(), + display_url: None, + source: Some("Serper Knowledge Graph".to_string()), + score: Some(1.0), + html_snippet: None, + date_published: None, + images: kg_images, + content_chunks: None, + }, + ); + } + + let total_results = response + .search_information + .as_ref() + .and_then(|info| info.total_results.as_ref()) + .and_then(|total| total.parse::().ok()); + + let search_time_ms = response + .search_information + .as_ref() + .and_then(|info| info.time_taken.map(|t| t * 1000.0)); + + let metadata = Some(SearchMetadata { + query: response + .search_parameters + .as_ref() + .map(|sp| sp.q.clone()) + .unwrap_or_else(|| params.query.clone()), + total_results, + search_time_ms, + safe_search: None, + language: params.language.clone(), + region: params.region.clone(), + next_page_token: None, + rate_limits: None, + }); + + (search_results, metadata) +} diff --git a/web-search/serper/src/lib.rs b/web-search/serper/src/lib.rs new file mode 100644 index 000000000..40b0475ba --- /dev/null +++ b/web-search/serper/src/lib.rs @@ -0,0 +1,114 @@ +use crate::client::SerperSearchApi; +use crate::conversions::{convert_params_to_request, convert_response_to_results}; +use golem_web_search::config::with_config_key; + +use golem_web_search::durability::DurableWebSearch; +use golem_web_search::golem::web_search::types::{ + SearchError, SearchMetadata, SearchParams, SearchResult, +}; +use golem_web_search::golem_web_search::web_search::web_search::{ + Guest, GuestSearchSession, SearchSession, +}; + +use golem_web_search::LOGGING_STATE; +use std::cell::RefCell; + +mod client; +mod conversions; + +struct SerperWebSearchComponent; + +impl SerperWebSearchComponent { + const API_KEY_ENV_VAR: &'static str = "SERPER_API_KEY"; +} + +pub struct SerperSearchSession { + client: SerperSearchApi, + params: SearchParams, + current_page: RefCell, + last_metadata: RefCell>, + has_more_results: RefCell, +} + +impl SerperSearchSession { + fn new(client: SerperSearchApi, params: SearchParams) -> Self { + Self { + client, + params, + current_page: RefCell::new(1), + last_metadata: RefCell::new(None), + has_more_results: RefCell::new(true), + } + } +} + +impl GuestSearchSession for SerperSearchSession { + fn next_page(&self) -> Result { + if !*self.has_more_results.borrow() { + return Err(SearchError::BackendError( + "No more results available".to_string(), + )); + } + + let current_page = *self.current_page.borrow(); + let new_page = current_page + 1; + *self.current_page.borrow_mut() = new_page; + + let request = convert_params_to_request(&self.params, Some(new_page)); + let response = self.client.search(request)?; + let (results, metadata) = convert_response_to_results(response, &self.params); + + *self.last_metadata.borrow_mut() = metadata.clone(); + + if results.is_empty() { + *self.has_more_results.borrow_mut() = false; + return Err(SearchError::BackendError("No more results".to_string())); + } + + if new_page >= 10 { + *self.has_more_results.borrow_mut() = false; + } + + results + .into_iter() + .next() + .ok_or_else(|| SearchError::BackendError("No results returned".to_string())) + } + + fn get_metadata(&self) -> Option { + self.last_metadata.borrow().clone() + } +} + +impl Guest for SerperWebSearchComponent { + type SearchSession = SerperSearchSession; + + fn start_search(params: SearchParams) -> Result { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + + with_config_key(&[Self::API_KEY_ENV_VAR], Err, |keys| { + let api_key = keys.get(Self::API_KEY_ENV_VAR).unwrap().to_owned(); + let client = SerperSearchApi::new(api_key); + Ok(SearchSession::new(SerperSearchSession::new(client, params))) + }) + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + + with_config_key(&[Self::API_KEY_ENV_VAR], Err, |keys| { + let api_key = keys.get(Self::API_KEY_ENV_VAR).unwrap().to_owned(); + let client = SerperSearchApi::new(api_key); + let request = convert_params_to_request(¶ms, None); + let response = client.search(request)?; + let (results, metadata) = convert_response_to_results(response, ¶ms); + Ok((results, metadata)) + }) + } +} + +type DurableSerperWebSearchComponent = DurableWebSearch; + +golem_web_search::export_web_search!(DurableSerperWebSearchComponent with_types_in golem_web_search); diff --git a/web-search/serper/wit/deps/golem-web-search/golem-web-search.wit b/web-search/serper/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..4279b7733 --- /dev/null +++ b/web-search/serper/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + + + +world web-search-library { + export web-search; +} diff --git a/web-search/serper/wit/deps/wasi:io/error.wit b/web-search/serper/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/web-search/serper/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/web-search/serper/wit/deps/wasi:io/poll.wit b/web-search/serper/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/web-search/serper/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/web-search/serper/wit/deps/wasi:io/streams.wit b/web-search/serper/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/web-search/serper/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/web-search/serper/wit/deps/wasi:io/world.wit b/web-search/serper/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/web-search/serper/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/web-search/serper/wit/serper.wit b/web-search/serper/wit/serper.wit new file mode 100644 index 000000000..c9954959d --- /dev/null +++ b/web-search/serper/wit/serper.wit @@ -0,0 +1,5 @@ +package golem:web-search-serper@1.0.0; + +world web-search-library { + include golem:web-search/web-search-library@1.0.0; +} diff --git a/web-search/tavily/Cargo.toml b/web-search/tavily/Cargo.toml new file mode 100644 index 000000000..fad3297f9 --- /dev/null +++ b/web-search/tavily/Cargo.toml @@ -0,0 +1,43 @@ +[package] +name = "golem-websearch-tavily" +version = "0.0.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly component for working with Tavily Search API, with special support for Golem Cloud" + +[lib] +path = "src/lib.rs" +crate-type = ["cdylib"] + +[features] +default = ["durability"] +durability = ["golem-rust/durability", "golem-web-search/durability"] + +[dependencies] +golem-web-search = { path = "../web-search", version = "0.0.0", default-features = false } + +golem-rust = { workspace = true } +log = { workspace = true } +reqwest = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } +base64 = { workspace = true } + +[package.metadata.component] +package = "golem:websearch-tavily" + +[package.metadata.component.bindings] +generate_unused_types = true + +[package.metadata.component.bindings.with] +"golem:web-search/web-search@1.0.0" = "golem_web_search::golem::web_search::web_search" + +[package.metadata.component.target] +path = "wit" + +[package.metadata.component.target.dependencies] +"golem:web-search" = { path = "wit/deps/golem-web-search" } +"wasi:io" = { path = "wit/deps/wasi:io"} diff --git a/web-search/tavily/src/bindings.rs b/web-search/tavily/src/bindings.rs new file mode 100644 index 000000000..55de799c4 --- /dev/null +++ b/web-search/tavily/src/bindings.rs @@ -0,0 +1,318 @@ +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! +// Options used: +// * runtime_path: "wit_bindgen_rt" +// * with "golem:web-search/web-search@1.0.0" = "golem_web_search::golem::web_search::web_search" +// * generate_unused_types +use golem_web_search::golem::web_search::web_search as __with_name0; +#[rustfmt::skip] +#[allow(dead_code, clippy::all)] +pub mod golem { + pub mod web_search { + #[allow(dead_code, async_fn_in_trait, unused_imports, clippy::all)] + pub mod types { + #[used] + #[doc(hidden)] + static __FORCE_SECTION_REF: fn() = super::super::super::__link_custom_section_describing_imports; + use super::super::super::_rt; + /// Optional image-related result data + #[derive(Clone)] + pub struct ImageResult { + pub url: _rt::String, + pub description: Option<_rt::String>, + } + impl ::core::fmt::Debug for ImageResult { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("ImageResult") + .field("url", &self.url) + .field("description", &self.description) + .finish() + } + } + /// Core structure for a single search result + #[derive(Clone)] + pub struct SearchResult { + pub title: _rt::String, + pub url: _rt::String, + pub snippet: _rt::String, + pub display_url: Option<_rt::String>, + pub source: Option<_rt::String>, + pub score: Option, + pub html_snippet: Option<_rt::String>, + pub date_published: Option<_rt::String>, + pub images: Option<_rt::Vec>, + pub content_chunks: Option<_rt::Vec<_rt::String>>, + } + impl ::core::fmt::Debug for SearchResult { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchResult") + .field("title", &self.title) + .field("url", &self.url) + .field("snippet", &self.snippet) + .field("display-url", &self.display_url) + .field("source", &self.source) + .field("score", &self.score) + .field("html-snippet", &self.html_snippet) + .field("date-published", &self.date_published) + .field("images", &self.images) + .field("content-chunks", &self.content_chunks) + .finish() + } + } + /// Safe search settings + #[repr(u8)] + #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] + pub enum SafeSearchLevel { + Off, + Medium, + High, + } + impl ::core::fmt::Debug for SafeSearchLevel { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + SafeSearchLevel::Off => { + f.debug_tuple("SafeSearchLevel::Off").finish() + } + SafeSearchLevel::Medium => { + f.debug_tuple("SafeSearchLevel::Medium").finish() + } + SafeSearchLevel::High => { + f.debug_tuple("SafeSearchLevel::High").finish() + } + } + } + } + impl SafeSearchLevel { + #[doc(hidden)] + pub unsafe fn _lift(val: u8) -> SafeSearchLevel { + if !cfg!(debug_assertions) { + return ::core::mem::transmute(val); + } + match val { + 0 => SafeSearchLevel::Off, + 1 => SafeSearchLevel::Medium, + 2 => SafeSearchLevel::High, + _ => panic!("invalid enum discriminant"), + } + } + } + /// Rate limiting metadata + #[repr(C)] + #[derive(Clone, Copy)] + pub struct RateLimitInfo { + pub limit: u32, + pub remaining: u32, + pub reset_timestamp: u64, + } + impl ::core::fmt::Debug for RateLimitInfo { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("RateLimitInfo") + .field("limit", &self.limit) + .field("remaining", &self.remaining) + .field("reset-timestamp", &self.reset_timestamp) + .finish() + } + } + /// Optional metadata for a search session + #[derive(Clone)] + pub struct SearchMetadata { + pub query: _rt::String, + pub total_results: Option, + pub search_time_ms: Option, + pub safe_search: Option, + pub language: Option<_rt::String>, + pub region: Option<_rt::String>, + pub next_page_token: Option<_rt::String>, + pub rate_limits: Option, + } + impl ::core::fmt::Debug for SearchMetadata { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchMetadata") + .field("query", &self.query) + .field("total-results", &self.total_results) + .field("search-time-ms", &self.search_time_ms) + .field("safe-search", &self.safe_search) + .field("language", &self.language) + .field("region", &self.region) + .field("next-page-token", &self.next_page_token) + .field("rate-limits", &self.rate_limits) + .finish() + } + } + /// Supported time range filtering + #[repr(u8)] + #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] + pub enum TimeRange { + Day, + Week, + Month, + Year, + } + impl ::core::fmt::Debug for TimeRange { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + TimeRange::Day => f.debug_tuple("TimeRange::Day").finish(), + TimeRange::Week => f.debug_tuple("TimeRange::Week").finish(), + TimeRange::Month => f.debug_tuple("TimeRange::Month").finish(), + TimeRange::Year => f.debug_tuple("TimeRange::Year").finish(), + } + } + } + impl TimeRange { + #[doc(hidden)] + pub unsafe fn _lift(val: u8) -> TimeRange { + if !cfg!(debug_assertions) { + return ::core::mem::transmute(val); + } + match val { + 0 => TimeRange::Day, + 1 => TimeRange::Week, + 2 => TimeRange::Month, + 3 => TimeRange::Year, + _ => panic!("invalid enum discriminant"), + } + } + } + /// Query parameters accepted by the unified search API + #[derive(Clone)] + pub struct SearchParams { + pub query: _rt::String, + pub safe_search: Option, + pub language: Option<_rt::String>, + pub region: Option<_rt::String>, + pub max_results: Option, + pub time_range: Option, + pub include_domains: Option<_rt::Vec<_rt::String>>, + pub exclude_domains: Option<_rt::Vec<_rt::String>>, + pub include_images: Option, + pub include_html: Option, + pub advanced_answer: Option, + } + impl ::core::fmt::Debug for SearchParams { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + f.debug_struct("SearchParams") + .field("query", &self.query) + .field("safe-search", &self.safe_search) + .field("language", &self.language) + .field("region", &self.region) + .field("max-results", &self.max_results) + .field("time-range", &self.time_range) + .field("include-domains", &self.include_domains) + .field("exclude-domains", &self.exclude_domains) + .field("include-images", &self.include_images) + .field("include-html", &self.include_html) + .field("advanced-answer", &self.advanced_answer) + .finish() + } + } + /// Structured search error + #[derive(Clone)] + pub enum SearchError { + InvalidQuery, + RateLimited(u32), + UnsupportedFeature(_rt::String), + BackendError(_rt::String), + } + impl ::core::fmt::Debug for SearchError { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + match self { + SearchError::InvalidQuery => { + f.debug_tuple("SearchError::InvalidQuery").finish() + } + SearchError::RateLimited(e) => { + f.debug_tuple("SearchError::RateLimited").field(e).finish() + } + SearchError::UnsupportedFeature(e) => { + f.debug_tuple("SearchError::UnsupportedFeature") + .field(e) + .finish() + } + SearchError::BackendError(e) => { + f.debug_tuple("SearchError::BackendError").field(e).finish() + } + } + } + } + impl ::core::fmt::Display for SearchError { + fn fmt( + &self, + f: &mut ::core::fmt::Formatter<'_>, + ) -> ::core::fmt::Result { + write!(f, "{:?}", self) + } + } + impl std::error::Error for SearchError {} + } + } +} +#[rustfmt::skip] +mod _rt { + #![allow(dead_code, clippy::all)] + pub use alloc_crate::string::String; + pub use alloc_crate::vec::Vec; + extern crate alloc as alloc_crate; +} +#[cfg(target_arch = "wasm32")] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:web-search-tavily@1.0.0:web-search-library:encoded world" +)] +#[doc(hidden)] +#[allow(clippy::octal_escapes)] +pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1377] = *b"\ +\0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xd8\x09\x01A\x02\x01\ +A\x08\x01B\x1c\x01ks\x01r\x02\x03urls\x0bdescription\0\x04\0\x0cimage-result\x03\ +\0\x01\x01ku\x01p\x02\x01k\x04\x01ps\x01k\x06\x01r\x0a\x05titles\x03urls\x07snip\ +pets\x0bdisplay-url\0\x06source\0\x05score\x03\x0chtml-snippet\0\x0edate-publish\ +ed\0\x06images\x05\x0econtent-chunks\x07\x04\0\x0dsearch-result\x03\0\x08\x01m\x03\ +\x03off\x06medium\x04high\x04\0\x11safe-search-level\x03\0\x0a\x01r\x03\x05limit\ +y\x09remainingy\x0freset-timestampw\x04\0\x0frate-limit-info\x03\0\x0c\x01kw\x01\ +k\x0b\x01k\x0d\x01r\x08\x05querys\x0dtotal-results\x0e\x0esearch-time-ms\x03\x0b\ +safe-search\x0f\x08language\0\x06region\0\x0fnext-page-token\0\x0brate-limits\x10\ +\x04\0\x0fsearch-metadata\x03\0\x11\x01m\x04\x03day\x04week\x05month\x04year\x04\ +\0\x0atime-range\x03\0\x13\x01ky\x01k\x14\x01k\x7f\x01r\x0b\x05querys\x0bsafe-se\ +arch\x0f\x08language\0\x06region\0\x0bmax-results\x15\x0atime-range\x16\x0finclu\ +de-domains\x07\x0fexclude-domains\x07\x0einclude-images\x17\x0cinclude-html\x17\x0f\ +advanced-answer\x17\x04\0\x0dsearch-params\x03\0\x18\x01q\x04\x0dinvalid-query\0\ +\0\x0crate-limited\x01y\0\x13unsupported-feature\x01s\0\x0dbackend-error\x01s\0\x04\ +\0\x0csearch-error\x03\0\x1a\x03\0\x1cgolem:web-search/types@1.0.0\x05\0\x02\x03\ +\0\0\x0dsearch-params\x02\x03\0\0\x0dsearch-result\x02\x03\0\0\x0fsearch-metadat\ +a\x02\x03\0\0\x0csearch-error\x01B\x19\x02\x03\x02\x01\x01\x04\0\x0dsearch-param\ +s\x03\0\0\x02\x03\x02\x01\x02\x04\0\x0dsearch-result\x03\0\x02\x02\x03\x02\x01\x03\ +\x04\0\x0fsearch-metadata\x03\0\x04\x02\x03\x02\x01\x04\x04\0\x0csearch-error\x03\ +\0\x06\x04\0\x0esearch-session\x03\x01\x01h\x08\x01j\x01\x03\x01\x07\x01@\x01\x04\ +self\x09\0\x0a\x04\0\x20[method]search-session.next-page\x01\x0b\x01k\x05\x01@\x01\ +\x04self\x09\0\x0c\x04\0#[method]search-session.get-metadata\x01\x0d\x01i\x08\x01\ +j\x01\x0e\x01\x07\x01@\x01\x06params\x01\0\x0f\x04\0\x0cstart-search\x01\x10\x01\ +p\x03\x01o\x02\x11\x0c\x01j\x01\x12\x01\x07\x01@\x01\x06params\x01\0\x13\x04\0\x0b\ +search-once\x01\x14\x04\0!golem:web-search/web-search@1.0.0\x05\x05\x04\00golem:\ +web-search-tavily/web-search-library@1.0.0\x04\0\x0b\x18\x01\0\x12web-search-lib\ +rary\x03\0\0\0G\x09producers\x01\x0cprocessed-by\x02\x0dwit-component\x070.227.1\ +\x10wit-bindgen-rust\x060.41.0"; +#[inline(never)] +#[doc(hidden)] +pub fn __link_custom_section_describing_imports() { + wit_bindgen_rt::maybe_link_cabi_realloc(); +} diff --git a/web-search/tavily/src/client.rs b/web-search/tavily/src/client.rs new file mode 100644 index 000000000..d8c424945 --- /dev/null +++ b/web-search/tavily/src/client.rs @@ -0,0 +1,158 @@ +use golem_web_search::error::from_reqwest_error; +use golem_web_search::golem::web_search::types::SearchError; +use log::trace; +use reqwest::{header, Client, Method, Response, StatusCode}; +use serde::{Deserialize, Serialize}; + +const BASE_URL: &str = "https://api.tavily.com/search"; + +pub struct TavilySearchApi { + api_key: String, + client: Client, +} + +impl TavilySearchApi { + pub fn new(api_key: String) -> Self { + let client = Client::builder() + .build() + .expect("Failed to initialize HTTP client"); + Self { api_key, client } + } + + pub fn search( + &self, + request: TavilySearchRequest, + ) -> Result { + trace!("Sending request to Tavily Search API: {request:?}"); + + let response: Response = self + .client + .request(Method::POST, BASE_URL) + .header(header::ACCEPT, "application/json") + .header(header::CONTENT_TYPE, "application/json") + .header(header::USER_AGENT, "golem-websearch/1.0") + .header(header::AUTHORIZATION, format!("Bearer {}", &self.api_key)) + .json(&request) + .send() + .map_err(|err| from_reqwest_error("Request failed", err))?; + + parse_response(response) + } +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TavilySearchRequest { + pub query: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub search_depth: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub topic: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub max_results: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_answer: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_raw_content: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_images: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_domains: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub exclude_domains: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub time_range: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub country: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub days: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TavilySearchResponse { + pub query: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub answer: Option, + pub results: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub images: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub response_time: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TavilySearchResult { + pub title: String, + pub url: String, + pub content: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_content: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub published_date: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub score: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(untagged)] +pub enum TavilyImage { + Url(String), + Object { + url: String, + #[serde(skip_serializing_if = "Option::is_none")] + description: Option, + }, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TavilyErrorResponse { + #[serde(rename = "type")] + pub error_type: String, + pub error: TavilyError, + pub time: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TavilyError { + pub id: Option, + pub status: u16, + pub code: String, + pub detail: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub meta: Option, +} + +fn parse_response(response: Response) -> Result { + match response.status() { + StatusCode::OK => { + let body = response.text().map_err(|e| { + SearchError::BackendError(format!("Failed to read response body: {e}")) + })?; + match serde_json::from_str::(&body) { + Ok(parsed) => Ok(parsed), + Err(e) => Err(SearchError::BackendError(format!( + "Failed to parse response: {e} \nRaw body: {body}" + ))), + } + } + StatusCode::BAD_REQUEST => { + let _body = response + .text() + .unwrap_or_else(|_| "".into()); + Err(SearchError::InvalidQuery) + } + StatusCode::UNAUTHORIZED => Err(SearchError::BackendError("Invalid API key".to_string())), + StatusCode::TOO_MANY_REQUESTS => Err(SearchError::RateLimited(60)), + status if status.as_u16() == 432 => { + Err(SearchError::BackendError("Plan limit exceeded".to_string())) + } + _ => { + let status = response.status(); + let body = response + .text() + .unwrap_or_else(|_| "".into()); + Err(SearchError::BackendError(format!( + "Request failed: {status} \nRaw body: {body}" + ))) + } + } +} diff --git a/web-search/tavily/src/conversions.rs b/web-search/tavily/src/conversions.rs new file mode 100644 index 000000000..c385cb6ed --- /dev/null +++ b/web-search/tavily/src/conversions.rs @@ -0,0 +1,155 @@ +use crate::client::{TavilyImage, TavilySearchRequest, TavilySearchResponse}; +use golem_web_search::golem::web_search::types::{ + ImageResult, SearchMetadata, SearchParams, SearchResult, TimeRange, +}; + +pub fn convert_params_to_request( + params: &SearchParams, + _offset: Option, +) -> TavilySearchRequest { + let max_results = params.max_results.unwrap_or(10).min(20); + + TavilySearchRequest { + query: params.query.clone(), + search_depth: Some( + if params.advanced_answer.unwrap_or(false) { + "advanced" + } else { + "basic" + } + .to_string(), + ), + topic: Some("news".to_string()), + max_results: Some(max_results), + include_answer: params.advanced_answer, + include_raw_content: Some(false), + include_images: Some(true), + include_domains: params.include_domains.clone(), + exclude_domains: params.exclude_domains.clone(), + time_range: params.time_range.as_ref().map(|tr| match tr { + TimeRange::Day => "day".to_string(), + TimeRange::Week => "week".to_string(), + TimeRange::Month => "month".to_string(), + TimeRange::Year => "year".to_string(), + }), + country: country_code_to_tavily(params.region.as_ref().unwrap_or(&"us".to_string())), + days: params.time_range.as_ref().map(|tr| match tr { + TimeRange::Day => 1, + TimeRange::Week => 7, + TimeRange::Month => 30, + TimeRange::Year => 365, + }), + } +} + +pub fn country_code_to_tavily(country_code: &str) -> Option { + match country_code.to_lowercase().as_str() { + "us" | "usa" | "united states" => Some("us".to_string()), + "uk" | "gb" | "united kingdom" => Some("gb".to_string()), + "ca" | "canada" => Some("ca".to_string()), + "au" | "australia" => Some("au".to_string()), + "de" | "germany" => Some("de".to_string()), + "fr" | "france" => Some("fr".to_string()), + "es" | "spain" => Some("es".to_string()), + "it" | "italy" => Some("it".to_string()), + "jp" | "japan" => Some("jp".to_string()), + "br" | "brazil" => Some("br".to_string()), + "in" | "india" => Some("in".to_string()), + "cn" | "china" => Some("cn".to_string()), + "ru" | "russia" => Some("ru".to_string()), + "mx" | "mexico" => Some("mx".to_string()), + "ar" | "argentina" => Some("ar".to_string()), + "cl" | "chile" => Some("cl".to_string()), + "co" | "colombia" => Some("co".to_string()), + "pe" | "peru" => Some("pe".to_string()), + "za" | "south africa" => Some("za".to_string()), + "ng" | "nigeria" => Some("ng".to_string()), + "eg" | "egypt" => Some("eg".to_string()), + "kr" | "south korea" => Some("kr".to_string()), + "th" | "thailand" => Some("th".to_string()), + "sg" | "singapore" => Some("sg".to_string()), + "my" | "malaysia" => Some("my".to_string()), + "id" | "indonesia" => Some("id".to_string()), + "ph" | "philippines" => Some("ph".to_string()), + "vn" | "vietnam" => Some("vn".to_string()), + "tw" | "taiwan" => Some("tw".to_string()), + "hk" | "hong kong" => Some("hk".to_string()), + "nl" | "netherlands" => Some("nl".to_string()), + "be" | "belgium" => Some("be".to_string()), + "ch" | "switzerland" => Some("ch".to_string()), + "at" | "austria" => Some("at".to_string()), + "se" | "sweden" => Some("se".to_string()), + "no" | "norway" => Some("no".to_string()), + "dk" | "denmark" => Some("dk".to_string()), + "fi" | "finland" => Some("fi".to_string()), + "pl" | "poland" => Some("pl".to_string()), + "cz" | "czech republic" => Some("cz".to_string()), + "hu" | "hungary" => Some("hu".to_string()), + "gr" | "greece" => Some("gr".to_string()), + "pt" | "portugal" => Some("pt".to_string()), + "tr" | "turkey" => Some("tr".to_string()), + "il" | "israel" => Some("il".to_string()), + "ae" | "uae" | "united arab emirates" => Some("ae".to_string()), + "sa" | "saudi arabia" => Some("sa".to_string()), + "nz" | "new zealand" => Some("nz".to_string()), + _ => Some(country_code.to_lowercase()), + } +} + +pub fn convert_response_to_results( + response: TavilySearchResponse, + params: &SearchParams, +) -> (Vec, Option) { + let search_results: Vec = response + .results + .into_iter() + .map(|result| { + let images = response.images.as_ref().and_then(|imgs| { + if !imgs.is_empty() { + Some( + imgs.iter() + .map(|img| match img { + TavilyImage::Url(url) => ImageResult { + url: url.clone(), + description: None, + }, + TavilyImage::Object { url, description } => ImageResult { + url: url.clone(), + description: description.clone(), + }, + }) + .collect(), + ) + } else { + None + } + }); + + SearchResult { + title: result.title, + url: result.url, + snippet: result.content, + display_url: None, + source: Some("Tavily".to_string()), + score: result.score, + html_snippet: None, + date_published: result.published_date, + images, + content_chunks: None, + } + }) + .collect(); + + let metadata = Some(SearchMetadata { + query: response.query, + total_results: None, + search_time_ms: response.response_time, + safe_search: None, + language: params.language.clone(), + region: params.region.clone(), + next_page_token: None, + rate_limits: None, + }); + + (search_results, metadata) +} diff --git a/web-search/tavily/src/lib.rs b/web-search/tavily/src/lib.rs new file mode 100644 index 000000000..e70d0d1c4 --- /dev/null +++ b/web-search/tavily/src/lib.rs @@ -0,0 +1,114 @@ +use crate::client::TavilySearchApi; +use crate::conversions::{convert_params_to_request, convert_response_to_results}; +use golem_web_search::config::with_config_key; + +use golem_web_search::durability::DurableWebSearch; +use golem_web_search::golem::web_search::types::{ + SearchError, SearchMetadata, SearchParams, SearchResult, +}; +use golem_web_search::golem_web_search::web_search::web_search::{ + Guest, GuestSearchSession, SearchSession, +}; + +use golem_web_search::LOGGING_STATE; +use std::cell::RefCell; + +mod client; +mod conversions; + +struct TavilyWebSearchComponent; + +impl TavilyWebSearchComponent { + const API_KEY_ENV_VAR: &'static str = "TAVILY_API_KEY"; +} + +pub struct TavilySearchSession { + client: TavilySearchApi, + params: SearchParams, + current_offset: RefCell, + last_metadata: RefCell>, + has_more_results: RefCell, +} + +impl TavilySearchSession { + fn new(client: TavilySearchApi, params: SearchParams) -> Self { + Self { + client, + params, + current_offset: RefCell::new(0), + last_metadata: RefCell::new(None), + has_more_results: RefCell::new(true), + } + } +} + +impl GuestSearchSession for TavilySearchSession { + fn next_page(&self) -> Result { + if !*self.has_more_results.borrow() { + return Err(SearchError::BackendError( + "No more results available".to_string(), + )); + } + + let page_size = self.params.max_results.unwrap_or(10); + let current_offset = *self.current_offset.borrow(); + let new_offset = current_offset + page_size; + *self.current_offset.borrow_mut() = new_offset; + + let request = convert_params_to_request(&self.params, Some(new_offset)); + let response = self.client.search(request)?; + let (results, metadata) = convert_response_to_results(response, &self.params); + + *self.last_metadata.borrow_mut() = metadata.clone(); + + if results.is_empty() { + *self.has_more_results.borrow_mut() = false; + return Err(SearchError::BackendError("No more results".to_string())); + } + + // Tavily doesn't support pagination in the same way, so we'll mark as no more results + *self.has_more_results.borrow_mut() = false; + + results + .into_iter() + .next() + .ok_or_else(|| SearchError::BackendError("No results returned".to_string())) + } + + fn get_metadata(&self) -> Option { + self.last_metadata.borrow().clone() + } +} + +impl Guest for TavilyWebSearchComponent { + type SearchSession = TavilySearchSession; + + fn start_search(params: SearchParams) -> Result { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + + with_config_key(&[Self::API_KEY_ENV_VAR], Err, |keys| { + let api_key = keys.get(Self::API_KEY_ENV_VAR).unwrap().to_owned(); + let client = TavilySearchApi::new(api_key); + Ok(SearchSession::new(TavilySearchSession::new(client, params))) + }) + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + + with_config_key(&[Self::API_KEY_ENV_VAR], Err, |keys| { + let api_key = keys.get(Self::API_KEY_ENV_VAR).unwrap().to_owned(); + let client = TavilySearchApi::new(api_key); + let request = convert_params_to_request(¶ms, None); + let response = client.search(request)?; + let (results, metadata) = convert_response_to_results(response, ¶ms); + Ok((results, metadata)) + }) + } +} + +type DurableTavilyWebSearchComponent = DurableWebSearch; + +golem_web_search::export_web_search!(DurableTavilyWebSearchComponent with_types_in golem_web_search); diff --git a/web-search/tavily/wit/deps/golem-web-search/golem-web-search.wit b/web-search/tavily/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..4279b7733 --- /dev/null +++ b/web-search/tavily/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + + + +world web-search-library { + export web-search; +} diff --git a/web-search/tavily/wit/deps/wasi:io/error.wit b/web-search/tavily/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/web-search/tavily/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/web-search/tavily/wit/deps/wasi:io/poll.wit b/web-search/tavily/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/web-search/tavily/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/web-search/tavily/wit/deps/wasi:io/streams.wit b/web-search/tavily/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/web-search/tavily/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/web-search/tavily/wit/deps/wasi:io/world.wit b/web-search/tavily/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/web-search/tavily/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/web-search/tavily/wit/tavily.wit b/web-search/tavily/wit/tavily.wit new file mode 100644 index 000000000..3cc5e54e2 --- /dev/null +++ b/web-search/tavily/wit/tavily.wit @@ -0,0 +1,5 @@ +package golem:web-search-tavily@1.0.0; + +world web-search-library { + include golem:web-search/web-search-library@1.0.0; +} diff --git a/web-search/test/wit/deps/golem-web-search/golem-web-search.wit b/web-search/test/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..4279b7733 --- /dev/null +++ b/web-search/test/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + + + +world web-search-library { + export web-search; +} diff --git a/web-search/test/wit/deps/io/error.wit b/web-search/test/wit/deps/io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/web-search/test/wit/deps/io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/web-search/test/wit/deps/io/poll.wit b/web-search/test/wit/deps/io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/web-search/test/wit/deps/io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/web-search/test/wit/deps/io/streams.wit b/web-search/test/wit/deps/io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/web-search/test/wit/deps/io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/web-search/test/wit/deps/io/world.wit b/web-search/test/wit/deps/io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/web-search/test/wit/deps/io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/web-search/web-search/Cargo.toml b/web-search/web-search/Cargo.toml new file mode 100644 index 000000000..a69a752c5 --- /dev/null +++ b/web-search/web-search/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "golem-web-search" +version = "0.0.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly components for working with AI models and providers APIs, with special support for Golem Cloud" + +[lib] +path = "src/lib.rs" +crate-type = ["rlib"] + +[dependencies] +golem-rust = { workspace = true } +log = { workspace = true } +mime = "0.3.17" +nom = { version = "7.1", default-features = false } +reqwest = { workspace = true } +thiserror = "2.0.12" +wasi-logger = "0.1.2" +wit-bindgen = { version = "0.40.0" } + +[features] +default = ["durability"] +durability = ["golem-rust/durability"] diff --git a/web-search/web-search/src/config.rs b/web-search/web-search/src/config.rs new file mode 100644 index 000000000..ed5e13489 --- /dev/null +++ b/web-search/web-search/src/config.rs @@ -0,0 +1,27 @@ +use crate::exports::golem::web_search::web_search::SearchError; +use std::{collections::HashMap, ffi::OsStr}; + +/// Gets an expected configuration value from the environment, and fails if its is not found +/// using the `fail` function. Otherwise, it runs `succeed` with the configuration value. +pub fn with_config_key( + keys: &[impl AsRef], + fail: impl FnOnce(SearchError) -> R, + succeed: impl FnOnce(HashMap) -> R, +) -> R { + let mut hashmap = HashMap::new(); + for key in keys { + match std::env::var(key.as_ref()) { + Ok(value) => { + hashmap.insert(key.as_ref().to_string_lossy().to_string(), value); + } + Err(_) => { + let error = SearchError::BackendError(format!( + "Missing config key: {}", + key.as_ref().to_string_lossy() + )); + return fail(error); + } + } + } + succeed(hashmap) +} diff --git a/web-search/web-search/src/durability.rs b/web-search/web-search/src/durability.rs new file mode 100644 index 000000000..402a64d7f --- /dev/null +++ b/web-search/web-search/src/durability.rs @@ -0,0 +1,112 @@ +use std::marker::PhantomData; + +/// Wraps a web search implementation with custom durability +pub struct DurableWebSearch { + phantom: PhantomData, +} + +/// When the durability feature flag is off, wrapping with `DurableWebSearch` is just a passthrough +#[cfg(not(feature = "durability"))] +mod passthrough_impl { + use crate::durability::DurableWebSearch; + use crate::exports::golem::web_search::web_search::{ + Guest, SearchError, SearchMetadata, SearchParams, SearchResult, SearchSession, + }; + + impl Guest for DurableWebSearch { + type SearchSession = Impl::SearchSession; + + fn start_search(params: SearchParams) -> Result { + Impl::start_search(params) + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + Impl::search_once(params) + } + } +} + +/// When the durability feature flag is on, wrapping with `DurableWebSearch` adds custom durability +/// on top of the provider-specific web search implementation using Golem's special host functions and +/// the `golem-rust` helper library. +/// +/// There will be custom durability entries saved in the oplog, with the full web search request and configuration +/// stored as input, and the full response stored as output. To serialize these in a way it is +/// observable by oplog consumers, each relevant data type has to be converted to/from `ValueAndType` +/// which is implemented using the type classes and builder in the `golem-rust` library. +#[cfg(feature = "durability")] +mod durable_impl { + use crate::durability::DurableWebSearch; + use crate::exports::golem::web_search::web_search::{ + Guest, SearchError, SearchMetadata, SearchParams, SearchResult, SearchSession, + }; + use golem_rust::bindings::golem::durability::durability::DurableFunctionType; + use golem_rust::durability::Durability; + use golem_rust::{with_persistence_level, FromValueAndType, IntoValue, PersistenceLevel}; + use std::fmt::{Display, Formatter}; + + impl Guest for DurableWebSearch { + type SearchSession = Impl::SearchSession; + + fn start_search(params: SearchParams) -> Result { + let durability = Durability::::new( + "golem_web_search", + "start_search", + DurableFunctionType::WriteRemote, + ); + if durability.is_live() { + let result = with_persistence_level(PersistenceLevel::PersistNothing, || { + Impl::start_search(params.clone()) + }); + match result { + Ok(session) => { + let _ = durability.persist_infallible(params, NoOutput); + Ok(session) + } + Err(err) => Err(err), + } + } else { + let _: NoOutput = durability.replay_infallible(); + Impl::start_search(params) + } + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + let durability = + Durability::<(Vec, Option), UnusedError>::new( + "golem_web_search", + "search_once", + DurableFunctionType::WriteRemote, + ); + if durability.is_live() { + let result = with_persistence_level(PersistenceLevel::PersistNothing, || { + Impl::search_once(params.clone()) + }); + match result { + Ok(success) => Ok(durability.persist_infallible(params, success)), + Err(err) => Err(err), + } + } else { + let result: (Vec, Option) = + durability.replay_infallible(); + Ok(result) + } + } + } + + #[derive(Debug, Clone, IntoValue, FromValueAndType)] + struct NoOutput; + + #[derive(Debug, IntoValue, FromValueAndType)] + struct UnusedError; + + impl Display for UnusedError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "UnusedError") + } + } +} diff --git a/web-search/web-search/src/error.rs b/web-search/web-search/src/error.rs new file mode 100644 index 000000000..c24d78883 --- /dev/null +++ b/web-search/web-search/src/error.rs @@ -0,0 +1,10 @@ +use crate::exports::golem::web_search::web_search::SearchError; + +/// Creates an `Error` value representing that something is unsuported +pub fn unsupported(what: impl AsRef) -> SearchError { + SearchError::UnsupportedFeature(format!("Unsupported: {}", what.as_ref())) +} + +pub fn from_reqwest_error(details: impl AsRef, err: reqwest::Error) -> SearchError { + SearchError::BackendError(format!("{}: {err}", details.as_ref())) +} diff --git a/web-search/web-search/src/lib.rs b/web-search/web-search/src/lib.rs new file mode 100644 index 000000000..5822ab159 --- /dev/null +++ b/web-search/web-search/src/lib.rs @@ -0,0 +1,43 @@ +pub mod config; +pub mod durability; +pub mod error; + +wit_bindgen::generate!({ + path: "../wit", + world: "web-search-library", + generate_all, + generate_unused_types: true, + additional_derives: [PartialEq, golem_rust::FromValueAndType, golem_rust::IntoValue], + pub_export_macro: true, +}); + +pub use crate::exports::golem as golem_web_search; +pub use __export_web_search_library_impl as export_web_search; +use std::cell::RefCell; +use std::str::FromStr; + +pub struct LoggingState { + logging_initialized: bool, +} + +impl LoggingState { + /// Initializes WASI logging based on the `GOLEM_WEB_SEARCH_LOG` environment variable. + pub fn init(&mut self) { + if !self.logging_initialized { + let _ = wasi_logger::Logger::install(); + let max_level: log::LevelFilter = log::LevelFilter::from_str( + &std::env::var("GOLEM_WEB_SEARCH_LOG").unwrap_or_default(), + ) + .unwrap_or(log::LevelFilter::Info); + log::set_max_level(max_level); + self.logging_initialized = true; + } + } +} + +thread_local! { + /// This holds the state of our application. + pub static LOGGING_STATE: RefCell = const { RefCell::new(LoggingState { + logging_initialized: false, + }) }; +} diff --git a/web-search/web-search/wit/deps/golem-web-search/golem-web-search.wit b/web-search/web-search/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..4279b7733 --- /dev/null +++ b/web-search/web-search/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + + + +world web-search-library { + export web-search; +} diff --git a/web-search/web-search/wit/deps/wasi:io/error.wit b/web-search/web-search/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/web-search/web-search/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/web-search/web-search/wit/deps/wasi:io/poll.wit b/web-search/web-search/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/web-search/web-search/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/web-search/web-search/wit/deps/wasi:io/streams.wit b/web-search/web-search/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/web-search/web-search/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/web-search/web-search/wit/deps/wasi:io/world.wit b/web-search/web-search/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/web-search/web-search/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/web-search/web-search/wit/web-search.wit b/web-search/web-search/wit/web-search.wit new file mode 100644 index 000000000..8222e707f --- /dev/null +++ b/web-search/web-search/wit/web-search.wit @@ -0,0 +1,5 @@ +package golem:web-search-library@1.0.0; + +world web-search-library { + export golem:web-search/web-search@1.0.0; +} diff --git a/web-search/wit/deps.lock b/web-search/wit/deps.lock new file mode 100644 index 000000000..adc795b3a --- /dev/null +++ b/web-search/wit/deps.lock @@ -0,0 +1,4 @@ +["wasi:io"] +url = "https://github.com/WebAssembly/wasi-io/archive/v0.2.3.tar.gz" +sha256 = "1cccbfe4122686ea57a25cd368e8cdfc408cbcad089f47fb6685b6f92e96f050" +sha512 = "7a95f964c13da52611141acd89bc8876226497f128e99dd176a4270c5b5efbd8cc847b5fbd1a91258d028c646db99e0424d72590cf1caf20f9f3a3343fad5017" diff --git a/web-search/wit/deps.toml b/web-search/wit/deps.toml new file mode 100644 index 000000000..15e1ae691 --- /dev/null +++ b/web-search/wit/deps.toml @@ -0,0 +1 @@ +"wasi:io" = "https://github.com/WebAssembly/wasi-io/archive/v0.2.3.tar.gz" diff --git a/web-search/wit/deps/wasi:io/error.wit b/web-search/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/web-search/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/web-search/wit/deps/wasi:io/poll.wit b/web-search/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/web-search/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/web-search/wit/deps/wasi:io/streams.wit b/web-search/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/web-search/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/web-search/wit/deps/wasi:io/world.wit b/web-search/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/web-search/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/web-search/wit/golem-web-search.wit b/web-search/wit/golem-web-search.wit new file mode 100644 index 000000000..4279b7733 --- /dev/null +++ b/web-search/wit/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + + + +world web-search-library { + export web-search; +}