diff --git a/.gitignore b/.gitignore index a930abcf6..650d5003e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ openapi target tmp -components \ No newline at end of file +components +.DS_Store \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 334bd657c..d247176e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,33 +17,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" -[[package]] -name = "ahash" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - [[package]] name = "android-tzdata" version = "0.1.1" @@ -65,12 +38,6 @@ version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" -[[package]] -name = "async-iterator" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "742b2f12ff517f144b6181d24f3f2481b503e05650ee79feec1f090048089f88" - [[package]] name = "auditable-serde" version = "0.8.0" @@ -91,9 +58,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-config" -version = "1.8.0" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455e9fb7743c6f6267eb2830ccc08686fbb3d13c9a689369562fd4d4ef9ea462" +checksum = "ebd9b83179adf8998576317ce47785948bcff399ec5b15f4dfbdedd44ddf5b92" dependencies = [ "aws-credential-types", "aws-runtime", @@ -116,9 +83,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.3" +version = "1.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "687bc16bc431a8533fe0097c7f0182874767f920989d7260950172ae8e3c4465" +checksum = "b68c2194a190e1efc999612792e25b1ab3abfefe4306494efaaabc25933c0cbe" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -128,9 +95,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.8" +version = "1.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6c68419d8ba16d9a7463671593c54f81ba58cab466e9b759418da606dcc2e2" +checksum = "b2090e664216c78e766b6bac10fe74d2f451c02441d43484cd76ac9a295075f7" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -153,9 +120,9 @@ dependencies = [ [[package]] name = "aws-sdk-bedrockruntime" -version = "1.93.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c46900e6ef102ae75cd2ff16d5a73085228a8b0fcccb987d85f792673da00ef" +checksum = "a2cabf0de26d45a7529721f8900657d005b9989b1b26121b0200450126b2a685" dependencies = [ "aws-credential-types", "aws-runtime", @@ -178,9 +145,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.75.0" +version = "1.78.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3258fa707f2f585ee3049d9550954b959002abd59176975150a01d5cf38ae3f" +checksum = "37f7766d2344f56d10d12f3c32993da36d78217f32594fe4fb8e57a538c1cdea" dependencies = [ "aws-credential-types", "aws-runtime", @@ -235,9 +202,9 @@ dependencies = [ [[package]] name = "aws-smithy-eventstream" -version = "0.60.9" +version = "0.60.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "338a3642c399c0a5d157648426110e199ca7fd1c689cc395676b81aa563700c4" +checksum = "604c7aec361252b8f1c871a7641d5e0ba3a7f5a586e51b66bc9510a5519594d9" dependencies = [ "aws-smithy-types", "bytes", @@ -246,9 +213,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.1" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99335bec6cdc50a346fda1437f9fefe33abf8c99060739a546a16457f2862ca9" +checksum = "43c82ba4cab184ea61f6edaafc1072aad3c2a17dcf4c0fce19ac5694b90d8b5f" dependencies = [ "aws-smithy-eventstream", "aws-smithy-runtime-api", @@ -295,9 +262,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.8.3" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14302f06d1d5b7d333fd819943075b13d27c7700b414f574c3c35859bfb55d5e" +checksum = "c3aaec682eb189e43c8a19c3dab2fe54590ad5f2cc2d26ab27608a20f2acf81c" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -530,16 +497,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "cordyceps" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "688d7fbb8092b8de775ef2536f36c8c31f2bc4006ece2e8d8ad2d17d00ce0a2a" -dependencies = [ - "loom", - "tracing", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -583,12 +540,6 @@ dependencies = [ "powerfmt", ] -[[package]] -name = "diatomic-waker" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c" - [[package]] name = "digest" version = "0.10.7" @@ -638,12 +589,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fixedbitset" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" - [[package]] name = "flate2" version = "1.1.1" @@ -690,19 +635,6 @@ dependencies = [ "futures-util", ] -[[package]] -name = "futures-buffered" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe940397c8b744b9c2c974791c2c08bca2c3242ce0290393249e98f215a00472" -dependencies = [ - "cordyceps", - "diatomic-waker", - "futures-core", - "pin-project-lite", - "spin", -] - [[package]] name = "futures-channel" version = "0.3.31" @@ -713,21 +645,6 @@ dependencies = [ "futures-sink", ] -[[package]] -name = "futures-concurrency" -version = "7.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eb68017df91f2e477ed4bea586c59eaecaa47ed885a770d0444e21e62572cd2" -dependencies = [ - "fixedbitset", - "futures-buffered", - "futures-core", - "futures-lite", - "pin-project", - "slab", - "smallvec", -] - [[package]] name = "futures-core" version = "0.3.31" @@ -751,19 +668,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" -[[package]] -name = "futures-lite" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5edaec856126859abb19ed65f39e90fea3a9574b9707f13539acf4abf7eb532" -dependencies = [ - "fastrand", - "futures-core", - "futures-io", - "parking", - "pin-project-lite", -] - [[package]] name = "futures-macro" version = "0.3.31" @@ -805,20 +709,6 @@ dependencies = [ "slab", ] -[[package]] -name = "generator" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d18470a76cb7f8ff746cf1f7470914f900252ec36bbc40b569d74b1258446827" -dependencies = [ - "cc", - "cfg-if", - "libc", - "log", - "rustversion", - "windows", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -875,7 +765,7 @@ dependencies = [ "log", "mime", "nom", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "thiserror", "wasi-logger", "wit-bindgen 0.40.0", @@ -889,7 +779,7 @@ dependencies = [ "golem-llm", "golem-rust", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "wit-bindgen-rt 0.40.0", @@ -911,7 +801,6 @@ dependencies = [ "golem-rust", "infer", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-july-2025)", "serde", "serde_json", "wasi 0.14.2+wasi-0.2.4", @@ -927,7 +816,7 @@ dependencies = [ "golem-llm", "golem-rust", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "wit-bindgen-rt 0.40.0", @@ -942,7 +831,7 @@ dependencies = [ "golem-rust", "log", "mime_guess", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "url", @@ -957,7 +846,7 @@ dependencies = [ "golem-llm", "golem-rust", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "wit-bindgen-rt 0.40.0", @@ -971,7 +860,7 @@ dependencies = [ "golem-llm", "golem-rust", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "wit-bindgen-rt 0.40.0", @@ -1011,7 +900,7 @@ dependencies = [ "log", "mime", "nom", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "thiserror", "wasi-logger", "wit-bindgen 0.40.0", @@ -1026,7 +915,7 @@ dependencies = [ "golem-rust", "golem-search", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "wit-bindgen-rt 0.40.0", @@ -1041,7 +930,7 @@ dependencies = [ "golem-rust", "golem-search", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "wit-bindgen-rt 0.40.0", @@ -1056,7 +945,7 @@ dependencies = [ "golem-rust", "golem-search", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "wit-bindgen-rt 0.40.0", @@ -1071,7 +960,7 @@ dependencies = [ "golem-rust", "golem-search", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "wit-bindgen-rt 0.40.0", @@ -1086,7 +975,7 @@ dependencies = [ "golem-rust", "golem-search", "log", - "reqwest 0.12.15 (git+https://github.com/golemcloud/reqwest?branch=update-may-2025)", + "reqwest", "serde", "serde_json", "urlencoding", @@ -1107,13 +996,83 @@ dependencies = [ ] [[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +name = "golem-web-search" +version = "0.0.0" +dependencies = [ + "golem-rust", + "log", + "reqwest", + "serde", + "serde_json", + "wasi-logger", + "wit-bindgen 0.40.0", +] + +[[package]] +name = "golem-web-search-brave" +version = "0.1.0" +dependencies = [ + "base64 0.22.1", + "golem-rust", + "golem-web-search", + "log", + "reqwest", + "serde", + "serde_json", + "url", + "urlencoding", + "wasm-bindgen", + "wit-bindgen-rt 0.40.0", +] + +[[package]] +name = "golem-web-search-google" +version = "0.1.0" dependencies = [ - "ahash", - "allocator-api2", + "base64 0.22.1", + "golem-rust", + "golem-web-search", + "log", + "reqwest", + "serde", + "serde_json", + "url", + "urlencoding", + "wasm-bindgen", + "wit-bindgen-rt 0.40.0", +] + +[[package]] +name = "golem-web-search-serper" +version = "0.1.0" +dependencies = [ + "base64 0.22.1", + "golem-rust", + "golem-web-search", + "log", + "reqwest", + "serde", + "serde_json", + "url", + "wasm-bindgen", + "wit-bindgen-rt 0.40.0", +] + +[[package]] +name = "golem-web-search-tavily" +version = "0.1.0" +dependencies = [ + "base64 0.22.1", + "golem-rust", + "golem-web-search", + "log", + "reqwest", + "serde", + "serde_json", + "url", + "urlencoding", + "wasm-bindgen", + "wit-bindgen-rt 0.40.0", ] [[package]] @@ -1389,7 +1348,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.3", + "hashbrown", "serde", ] @@ -1399,6 +1358,17 @@ version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" +[[package]] +name = "io-uring" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "itoa" version = "1.0.15" @@ -1415,12 +1385,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" - [[package]] name = "leb128" version = "0.2.5" @@ -1451,28 +1415,6 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" -[[package]] -name = "loom" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" -dependencies = [ - "cfg-if", - "generator", - "scoped-tls", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "matchers" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" -dependencies = [ - "regex-automata 0.1.10", -] - [[package]] name = "memchr" version = "2.7.4" @@ -1511,23 +1453,24 @@ dependencies = [ ] [[package]] -name = "nom" -version = "7.1.3" +name = "mio" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ - "memchr", - "minimal-lexical", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", + "windows-sys", ] [[package]] -name = "nu-ansi-term" -version = "0.46.0" +name = "nom" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ - "overload", - "winapi", + "memchr", + "minimal-lexical", ] [[package]] @@ -1575,44 +1518,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - -[[package]] -name = "parking" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" - [[package]] name = "percent-encoding" version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "pin-project" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1674,78 +1585,12 @@ version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" -[[package]] -name = "regex" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", -] - -[[package]] -name = "regex-automata" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.8.5", -] - [[package]] name = "regex-lite" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" - -[[package]] -name = "reqwest" -version = "0.12.15" -source = "git+https://github.com/golemcloud/reqwest?branch=update-july-2025#9e0c586a3f2fc2f9fe32ddf46c2a49152777693b" -dependencies = [ - "async-iterator", - "base64 0.22.1", - "bytes", - "encoding_rs", - "futures", - "futures-concurrency", - "http 1.3.1", - "mime", - "percent-encoding", - "serde", - "serde_json", - "serde_urlencoded", - "url", - "wasi 0.12.1+wasi-0.2.0", - "wasi-async-runtime", -] - [[package]] name = "reqwest" version = "0.12.15" @@ -1795,12 +1640,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - [[package]] name = "semver" version = "1.0.26" @@ -1871,15 +1710,6 @@ dependencies = [ "digest", ] -[[package]] -name = "sharded-slab" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" -dependencies = [ - "lazy_static", -] - [[package]] name = "shlex" version = "1.3.0" @@ -1910,12 +1740,6 @@ dependencies = [ "smallvec", ] -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -1979,15 +1803,6 @@ dependencies = [ "syn", ] -[[package]] -name = "thread_local" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" -dependencies = [ - "cfg-if", -] - [[package]] name = "time" version = "0.3.41" @@ -2030,12 +1845,16 @@ dependencies = [ [[package]] name = "tokio" -version = "1.45.1" +version = "1.46.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779" +checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" dependencies = [ "backtrace", + "io-uring", + "libc", + "mio", "pin-project-lite", + "slab", ] [[package]] @@ -2079,36 +1898,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" -dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", ] [[package]] @@ -2183,12 +1972,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "valuable" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" - [[package]] name = "version_check" version = "0.9.5" @@ -2210,6 +1993,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + [[package]] name = "wasi" version = "0.12.1+wasi-0.2.0" @@ -2228,17 +2017,6 @@ dependencies = [ "wit-bindgen-rt 0.39.0", ] -[[package]] -name = "wasi-async-runtime" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9df0b7f89869b578aa56d8e4749776be8949ba9abda52fc8d5c15f02e901e022" -dependencies = [ - "hashbrown 0.14.5", - "slab", - "wasi 0.12.1+wasi-0.2.0", -] - [[package]] name = "wasi-logger" version = "0.1.2" @@ -2379,55 +2157,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f51cad774fb3c9461ab9bccc9c62dfb7388397b5deda31bf40e8108ccd678b2" dependencies = [ "bitflags", - "hashbrown 0.15.3", + "hashbrown", "indexmap", "semver", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows" -version = "0.61.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5ee8f3d025738cb02bad7868bbb5f8a6327501e870bf51f1b455b0a2454a419" -dependencies = [ - "windows-collections", - "windows-core", - "windows-future", - "windows-link", - "windows-numerics", -] - -[[package]] -name = "windows-collections" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" -dependencies = [ - "windows-core", -] - [[package]] name = "windows-core" version = "0.61.2" @@ -2441,17 +2175,6 @@ dependencies = [ "windows-strings", ] -[[package]] -name = "windows-future" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" -dependencies = [ - "windows-core", - "windows-link", - "windows-threading", -] - [[package]] name = "windows-implement" version = "0.60.0" @@ -2476,19 +2199,9 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" - -[[package]] -name = "windows-numerics" -version = "0.2.0" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" -dependencies = [ - "windows-core", - "windows-link", -] +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] name = "windows-result" @@ -2508,6 +2221,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -2524,15 +2246,6 @@ dependencies = [ "windows_x86_64_msvc", ] -[[package]] -name = "windows-threading" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" -dependencies = [ - "windows-link", -] - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -2865,26 +2578,6 @@ dependencies = [ "synstructure", ] -[[package]] -name = "zerocopy" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "zerofrom" version = "0.1.6" diff --git a/Cargo.toml b/Cargo.toml index a0669fe7e..4206c8d44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,11 @@ members = [ "llm/ollama", "llm/openai", "llm/openrouter", + "websearch/websearch", + "websearch/brave", + "websearch/google", + "websearch/serper", + "websearch/tavily", "search/search", "search/elasticsearch", "search/algolia", diff --git a/Makefile.toml b/Makefile.toml index 360fda01b..23c43600e 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -13,7 +13,8 @@ args = ["test"] [tasks.build] script_runner = "@duckscript" script = ''' -domains = array llm search + +domains = array llm websearch search # if there is no domain passed run for every domain if is_empty ${1} @@ -28,7 +29,8 @@ end [tasks.release-build] script_runner = "@duckscript" script = ''' -domains = array llm search + +domains = array llm websearch search # if there is no domain passed run for every domain if is_empty ${1} @@ -44,7 +46,9 @@ end script_runner = "@duckscript" script = ''' #!/bin/bash -domains = array llm search + +domains = array llm websearch search + # if there is no domain passed run for every domain if is_empty ${1} @@ -60,7 +64,9 @@ end script_runner = "@duckscript" script = ''' #!/bin/bash -domains = array llm search + +domains = array llm websearch search + # if there is no domain passed run for every domain if is_empty ${1} @@ -75,7 +81,9 @@ end [tasks.wit] script_runner = "@duckscript" script = ''' -domains = array llm search + +domains = array llm websearch search + # if there is no domain passed run for every domain if is_empty ${1} @@ -91,7 +99,9 @@ end description = "Builds all test components with golem-cli" script_runner = "@duckscript" script = ''' -domains = array llm search + +domains = array llm websearch search + # if there is no domain passed run for every domain if is_empty ${1} @@ -144,7 +154,10 @@ is_portable = eq ${portable} "yes" environment = get_env "ENVIRONMENT" is_dev = eq ${environment} "dev" -targets = array llm_openai llm_anthropic llm_grok llm_openrouter llm_ollama llm_bedrock search_algolia search_elasticsearch search_meilisearch search_opensearch search_typesense + +targets = array llm_openai llm_anthropic llm_grok llm_openrouter llm_ollama web_search_brave web_search_google web_search_serper web_search_tavily search_algolia search_elasticsearch search_meilisearch search_opensearch search_typesense + + for target in ${targets} if ${is_portable} if ${is_dev} @@ -171,7 +184,11 @@ is_portable = eq ${portable} "yes" environment = get_env "ENVIRONMENT" is_dev = eq ${environment} "dev" -targets = array llm_openai llm_anthropic llm_grok llm_openrouter llm_ollama llm_bedrock search_algolia search_elasticsearch search_meilisearch search_opensearch search_typesense + + +targets = array llm_openai llm_anthropic llm_grok llm_openrouter llm_ollama websearch_brave websearch_google websearch_serper websearch_tavily search_algolia search_elasticsearch search_meilisearch search_opensearch search_typesense + + for target in ${targets} if ${is_portable} if ${is_dev} @@ -255,4 +272,4 @@ foreach ($file in $cargoFiles) Foreach-Object { $_ -replace "0.0.0", $Env:VERSION } | Set-Content $file.PSPath } -''' +''' \ No newline at end of file diff --git a/test/.gitignore b/test/.gitignore index 175f32628..943c59c69 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -1,2 +1,3 @@ golem-temp target +.DS_Store \ No newline at end of file diff --git a/test/Cargo.lock b/test/Cargo.lock index 22a6de76b..25d73d97a 100644 --- a/test/Cargo.lock +++ b/test/Cargo.lock @@ -886,6 +886,18 @@ dependencies = [ "wit-bindgen-rt 0.40.0", ] +[[package]] +name = "test_websearch" +version = "0.0.0" +dependencies = [ + "golem-rust", + "log", + "reqwest", + "serde", + "serde_json", + "wit-bindgen-rt 0.40.0", +] + [[package]] name = "thiserror" version = "2.0.12" diff --git a/test/components-rust/test-websearch/Cargo.toml b/test/components-rust/test-websearch/Cargo.toml new file mode 100644 index 000000000..b52634b89 --- /dev/null +++ b/test/components-rust/test-websearch/Cargo.toml @@ -0,0 +1,46 @@ +[package] +name = "test_websearch" +version = "0.0.0" +edition = "2021" + +[lib] +path = "src/lib.rs" +crate-type = ["cdylib"] +required-features = [] + +[features] +default = ["google"] +brave = [] +tavily = [] +google = [] +serper = [] + +[dependencies] +golem-rust = { workspace = true } +log = { version = "0.4.27" } +reqwest = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } + +[package.metadata.component.target] +path = "wit-generated" + + +[package.metadata.component.target.dependencies] +"wasi:io" = { path = "wit-generated/deps/io" } +"wasi:clocks" = { path = "wit-generated/deps/clocks" } +"golem:rpc" = { path = "wit-generated/deps/golem-rpc" } +"golem:web-search" = { path = "wit-generated/deps/golem-websearch" } +"test:helper-client" = { path = "wit-generated/deps/test_helper-client" } +"test:websearch-exports" = { path = "wit-generated/deps/test_websearch-exports" } + +[package.metadata.component.bindings] + +[package.metadata.component.bindings.with] +"wasi:io/poll@0.2.0" = "golem_rust::wasm_rpc::wasi::io::poll" +"wasi:clocks/wall-clock@0.2.0" = "golem_rust::wasm_rpc::wasi::clocks::wall_clock" +"golem:rpc/types@0.2.0" = "golem_rust::wasm_rpc::golem_rpc_0_2_x::types" +# See https://github.com/bytecodealliance/cargo-component/blob/main/src/metadata.rs#L62 +# derives = ["serde::Serialize", "serde::Deserialize"] +# generate_unused_types = true diff --git a/test/components-rust/test-websearch/golem.yaml b/test/components-rust/test-websearch/golem.yaml new file mode 100644 index 000000000..da296b9fc --- /dev/null +++ b/test/components-rust/test-websearch/golem.yaml @@ -0,0 +1,178 @@ +components: + test:websearch: + defaultProfile: google-debug + profiles: + # DEBUG PROFILES + google-debug: + build: + - command: cargo component build --no-default-features --features google + sources: + - src + - wit-generated + targets: + - ../../target/wasm32-wasip1/debug/test_websearch.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/debug/golem_web_search_google.wasm ../../target/wasm32-wasip1/debug/test_websearch.wasm -o ../../target/wasm32-wasip1/debug/test_google_plugged.wasm + sources: + - ../../target/wasm32-wasip1/debug/test_websearch.wasm + - ../../../target/wasm32-wasip1/debug/golem_web_search_google.wasm + targets: + - ../../target/wasm32-wasip1/debug/test_google_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/debug/test_google_plugged.wasm + linkedWasm: ../../golem-temp/components/test_google_debug.wasm + clean: + - src/bindings.rs + + serper-debug: + build: + - command: cargo component build --no-default-features --features serper + sources: + - src + - wit-generated + targets: + - ../../target/wasm32-wasip1/debug/test_websearch.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/debug/golem_web_search_serper.wasm ../../target/wasm32-wasip1/debug/test_websearch.wasm -o ../../target/wasm32-wasip1/debug/test_serper_plugged.wasm + sources: + - ../../target/wasm32-wasip1/debug/test_websearch.wasm + - ../../../target/wasm32-wasip1/debug/golem_web_search_serper.wasm + targets: + - ../../target/wasm32-wasip1/debug/test_serper_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/debug/test_serper_plugged.wasm + linkedWasm: ../../golem-temp/components/test_serper_debug.wasm + clean: + - src/bindings.rs + + tavily-debug: + build: + - command: cargo component build --no-default-features --features tavily + sources: + - src + - wit-generated + targets: + - ../../target/wasm32-wasip1/debug/test_websearch.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/debug/golem_web_search_tavily.wasm ../../target/wasm32-wasip1/debug/test_websearch.wasm -o ../../target/wasm32-wasip1/debug/test_tavily_plugged.wasm + sources: + - ../../target/wasm32-wasip1/debug/test_websearch.wasm + - ../../../target/wasm32-wasip1/debug/golem_web_search_tavily.wasm + targets: + - ../../target/wasm32-wasip1/debug/test_tavily_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/debug/test_tavily_plugged.wasm + linkedWasm: ../../golem-temp/components/test_tavily_debug.wasm + clean: + - src/bindings.rs + + brave-debug: + build: + - command: cargo component build --no-default-features --features brave + sources: + - src + - wit-generated + targets: + - ../../target/wasm32-wasip1/debug/test_websearch.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/debug/golem_web_search_brave.wasm ../../target/wasm32-wasip1/debug/test_websearch.wasm -o ../../target/wasm32-wasip1/debug/test_brave_plugged.wasm + sources: + - ../../target/wasm32-wasip1/debug/test_websearch.wasm + - ../../../target/wasm32-wasip1/debug/golem_web_search_brave.wasm + targets: + - ../../target/wasm32-wasip1/debug/test_brave_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/debug/test_brave_plugged.wasm + linkedWasm: ../../golem-temp/components/test_brave_debug.wasm + clean: + - src/bindings.rs + + # RELEASE PROFILES + google-release: + build: + - command: cargo component build --release --no-default-features --features google + sources: + - src + - wit-generated + targets: + - ../../target/wasm32-wasip1/release/test_websearch.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/release/golem_web_search_google.wasm ../../target/wasm32-wasip1/release/test_websearch.wasm -o ../../target/wasm32-wasip1/release/test_google_plugged.wasm + sources: + - ../../target/wasm32-wasip1/release/test_websearch.wasm + - ../../../target/wasm32-wasip1/release/golem_web_search_google.wasm + targets: + - ../../target/wasm32-wasip1/release/test_google_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/release/test_google_plugged.wasm + linkedWasm: ../../golem-temp/components/test_google_release.wasm + clean: + - src/bindings.rs + + serper-release: + build: + - command: cargo component build --release --no-default-features --features serper + sources: + - src + - wit-generated + targets: + - ../../target/wasm32-wasip1/release/test_websearch.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/release/golem_web_search_serper.wasm ../../target/wasm32-wasip1/release/test_websearch.wasm -o ../../target/wasm32-wasip1/release/test_serper_plugged.wasm + sources: + - ../../target/wasm32-wasip1/release/test_websearch.wasm + - ../../../target/wasm32-wasip1/release/golem_web_search_serper.wasm + targets: + - ../../target/wasm32-wasip1/release/test_serper_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/release/test_serper_plugged.wasm + linkedWasm: ../../golem-temp/components/test_serper_release.wasm + clean: + - src/bindings.rs + + tavily-release: + build: + - command: cargo component build --release --no-default-features --features tavily + sources: + - src + - wit-generated + targets: + - ../../target/wasm32-wasip1/release/test_websearch.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/release/golem_web_search_tavily.wasm ../../target/wasm32-wasip1/release/test_websearch.wasm -o ../../target/wasm32-wasip1/release/test_tavily_plugged.wasm + sources: + - ../../target/wasm32-wasip1/release/test_websearch.wasm + - ../../../target/wasm32-wasip1/release/golem_web_search_tavily.wasm + targets: + - ../../target/wasm32-wasip1/release/test_tavily_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/release/test_tavily_plugged.wasm + linkedWasm: ../../golem-temp/components/test_tavily_release.wasm + clean: + - src/bindings.rs + + brave-release: + build: + - command: cargo component build --release --no-default-features --features brave + sources: + - src + - wit-generated + targets: + - ../../target/wasm32-wasip1/release/test_websearch.wasm + - command: wac plug --plug ../../../target/wasm32-wasip1/release/golem_web_search_brave.wasm ../../target/wasm32-wasip1/release/test_websearch.wasm -o ../../target/wasm32-wasip1/release/test_brave_plugged.wasm + sources: + - ../../target/wasm32-wasip1/release/test_websearch.wasm + - ../../../target/wasm32-wasip1/release/golem_web_search_brave.wasm + targets: + - ../../target/wasm32-wasip1/release/test_brave_plugged.wasm + sourceWit: wit + generatedWit: wit-generated + componentWasm: ../../target/wasm32-wasip1/release/test_brave_plugged.wasm + linkedWasm: ../../golem-temp/components/test_brave_release.wasm + clean: + - src/bindings.rs + +dependencies: + test:websearch: + - target: test:helper + type: wasm-rpc \ No newline at end of file diff --git a/test/components-rust/test-websearch/src/lib.rs b/test/components-rust/test-websearch/src/lib.rs new file mode 100644 index 000000000..28a5d083c --- /dev/null +++ b/test/components-rust/test-websearch/src/lib.rs @@ -0,0 +1,615 @@ +#[allow(static_mut_refs)] +mod bindings; + +use golem_rust::atomically; +use crate::bindings::test::helper_client::test_helper_client::TestHelperApi; +use crate::bindings::exports::test::websearch_exports::test_websearch_api::*; +use crate::bindings::golem::web_search::web_search; +use crate::bindings::golem::web_search::types::{ + SearchParams, + SafeSearchLevel, + TimeRange, + SearchError, +}; + +struct Component; + +#[cfg(feature = "google")] +const PROVIDER: &'static str = "google"; +#[cfg(feature = "brave")] +const PROVIDER: &'static str = "brave"; +#[cfg(feature = "tavily")] +const PROVIDER: &'static str = "tavily"; +#[cfg(feature = "serper")] +const PROVIDER: &'static str = "serper"; + +impl Guest for Component { + /// test1 demonstrates a simple, one-shot web search query + fn test1() -> String { + let params = SearchParams { + query: "weather forecast Slovenia".to_string(), + safe_search: Some(SafeSearchLevel::Medium), + language: Some("en".to_string()), + region: Some("US".to_string()), + max_results: Some(5), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Sending search request using {} provider...", PROVIDER); + let response = web_search::search_once(¶ms); + println!("Response: {:?}", response); + + match response { + Ok((results, metadata)) => { + let mut output = String::new(); + + output.push_str(&format!("Found {} results:\n", results.len())); + + for (i, result) in results.iter().enumerate() { + output.push_str( + &format!( + "{}. {}\n URL: {}\n Snippet: {}\n", + i + 1, + result.title, + result.url, + result.snippet + ) + ); + + if let Some(score) = result.score { + output.push_str(&format!(" Score: {:.2}\n", score)); + } + + if let Some(date) = &result.date_published { + output.push_str(&format!(" Published: {}\n", date)); + } + + output.push_str("\n"); + } + + if let Some(meta) = metadata { + output.push_str("\nDetailed Search Metadata:\n"); + output.push_str(&format!(" Query: {}\n", meta.query)); + if let Some(total) = meta.total_results { + output.push_str(&format!(" Total Results: {}\n", total)); + } + if let Some(time) = meta.search_time_ms { + output.push_str(&format!(" Search Time: {:.2}ms\n", time)); + } + if let Some(lang) = &meta.language { + output.push_str(&format!(" Language: {}\n", lang)); + } + if let Some(reg) = &meta.region { + output.push_str(&format!(" Region: {}\n", reg)); + } + if let Some(safe) = meta.safe_search { + output.push_str(&format!(" Safe Search Level: {:?}\n", safe)); + } + if let Some(rate_limit) = &meta.rate_limits { + output.push_str( + &format!( + " Rate Limit: {}/{} requests remaining (reset: {})\n", + rate_limit.remaining, + rate_limit.limit, + rate_limit.reset_timestamp + ) + ); + } + } + + output + } + Err(error) => { + let error_msg = format_search_error(error); + println!("{}", error_msg); + error_msg + } + } + } + + /// test2 simulates a crash during a streaming web search response, but only first time. + /// It demonstrates paginated search using search sessions. + /// after the automatic recovery it will continue and finish the request successfully. + fn test2() -> String { + let params = SearchParams { + query: "Rust programming language tutorials".to_string(), + safe_search: Some(SafeSearchLevel::Off), + language: Some("en".to_string()), + region: Some("US".to_string()), + max_results: Some(3), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Starting search session using {} provider...", PROVIDER); + + let session = match web_search::start_search(¶ms) { + Ok(session) => session, + Err(error) => { + let error_msg = format_search_error(error); + println!("{}", error_msg); + return error_msg; + } + }; + + let mut output = String::new(); + output.push_str("Search session started successfully!\n\n"); + let name = std::env::var("GOLEM_WORKER_NAME").unwrap(); + let mut round = 0; + + // Get first page + println!("Getting first page..."); + match session.next_page() { + Ok(results) => { + output.push_str(&format!("First page - {} results:\n", results.len())); + for (i, result) in results.iter().enumerate() { + output.push_str(&format!("{}. {}\n {}\n", i + 1, result.title, result.url)); + } + output.push_str("\n"); + } + Err(error) => { + let error_msg = format_search_error(error); + println!("{}", error_msg); + output.push_str(&format!("{}\n\n", error_msg)); + } + } + round += 1; + + // Add a delay before the next request to avoid rate limiting + std::thread::sleep(std::time::Duration::from_secs(2)); + + // Crash simulation before getting second page + if round == 1 { + atomically(|| { + let client = TestHelperApi::new(&name); + let answer = client.blocking_inc_and_get(); + if answer == 1 { + panic!("Simulating crash") + } + }); + } + + // Get second page + println!("Getting second page..."); + match session.next_page() { + Ok(results) => { + if results.is_empty() { + output.push_str("No more results available (end of pagination)\n"); + } else { + output.push_str(&format!("Second page - {} results:\n", results.len())); + for (i, result) in results.iter().enumerate() { + output.push_str( + &format!("{}. {}\n {}\n", i + 1, result.title, result.url) + ); + } + } + } + Err(error) => { + let error_msg = format_search_error(error); + println!("{}", error_msg); + output.push_str(&format!("{}\n", error_msg)); + } + } + + // Get metadata + if let Some(metadata) = session.get_metadata() { + output.push_str(&format!("\nDetailed Session Metadata:\n")); + output.push_str(&format!(" Query: {}\n", metadata.query)); + if let Some(total) = metadata.total_results { + output.push_str(&format!(" Total Results: {}\n", total)); + } + if let Some(time) = metadata.search_time_ms { + output.push_str(&format!(" Search Time: {:.2}ms\n", time)); + } + if let Some(lang) = &metadata.language { + output.push_str(&format!(" Language: {}\n", lang)); + } + if let Some(reg) = &metadata.region { + output.push_str(&format!(" Region: {}\n", reg)); + } + if let Some(safe) = metadata.safe_search { + output.push_str(&format!(" Safe Search Level: {:?}\n", safe)); + } + if let Some(rate_limits) = &metadata.rate_limits { + output.push_str( + &format!( + " Rate Limits: {}/{} remaining (reset: {})\n", + rate_limits.remaining, + rate_limits.limit, + rate_limits.reset_timestamp + ) + ); + } + // Assert and output current_page + let expected_page = 1; // After two next_page() calls, should be on page 1 (0-based) + assert_eq!( + metadata.current_page, + expected_page, + "Expected current_page to be {} after two next_page() calls, got {}", + expected_page, + metadata.current_page + ); + output.push_str(&format!(" Current Page: {}\n", metadata.current_page)); + } + + output + } + + /// test3 demonstrates time-filtered search for recent news + fn test3() -> String { + let params = SearchParams { + query: "artificial intelligence breakthrough".to_string(), + safe_search: Some(SafeSearchLevel::Medium), + language: Some("en".to_string()), + region: Some("US".to_string()), + max_results: Some(5), + time_range: Some(TimeRange::Week), + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Searching for recent AI news using {} provider...", PROVIDER); + let response = web_search::search_once(¶ms); + + match response { + Ok((results, metadata)) => { + let mut output = String::new(); + output.push_str("Recent AI news (past week):\n\n"); + + for (i, result) in results.iter().enumerate() { + output.push_str(&format!("{}. {}\n", i + 1, result.title)); + output.push_str(&format!(" URL: {}\n", result.url)); + output.push_str(&format!(" Snippet: {}\n", result.snippet)); + + if let Some(date) = &result.date_published { + output.push_str(&format!(" Published: {}\n", date)); + } + + if let Some(source) = &result.source { + output.push_str(&format!(" Source: {}\n", source)); + } + + output.push_str("\n"); + } + + if let Some(meta) = metadata { + output.push_str( + &format!("Search parameters: time_range={:?}\n", TimeRange::Week) + ); + if let Some(total) = meta.total_results { + output.push_str(&format!("Total results available: {}\n", total)); + } + } + + output + } + Err(error) => { + let error_msg = format_search_error(error); + println!("{}", error_msg); + error_msg + } + } + } + + /// test4 demonstrates domain filtering (include specific domains) + fn test4() -> String { + let domains = vec![ + "nature.com".to_string(), + "science.org".to_string(), + "sciencedirect.com".to_string() + ]; + + let params = SearchParams { + query: "climate change research".to_string(), + safe_search: Some(SafeSearchLevel::Medium), + language: Some("en".to_string()), + region: Some("US".to_string()), + max_results: Some(6), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Searching academic sources for climate research using {} provider...", PROVIDER); + let response = web_search::search_once(¶ms); + + match response { + Ok((results, metadata)) => { + let mut output = String::new(); + output.push_str("Climate research from academic sources:\n\n"); + + if results.is_empty() { + output.push_str("No results found from the specified academic domains.\n"); + } + + for (i, result) in results.iter().enumerate() { + output.push_str(&format!("{}. {}\n", i + 1, result.title)); + output.push_str(&format!(" URL: {}\n", result.url)); + output.push_str(&format!(" Snippet: {}\n", result.snippet)); + + if let Some(display_url) = &result.display_url { + output.push_str(&format!(" Display URL: {}\n", display_url)); + } + + output.push_str("\n"); + } + + output.push_str(&format!("Target academic domains: {}\n", domains.join(", "))); + + if let Some(meta) = metadata { + output.push_str("\nSearch metadata:\n"); + output.push_str(&format!(" Query: {}\n", meta.query)); + if let Some(total) = meta.total_results { + output.push_str(&format!(" Total results: {}\n", total)); + } + if let Some(time) = meta.search_time_ms { + output.push_str(&format!(" Search time: {:.2}ms\n", time)); + } + } + + output + } + Err(error) => { + let error_msg = format_search_error(error); + println!("{}", error_msg); + error_msg + } + } + } + + /// test5 demonstrates domain exclusion and image inclusion + fn test5() -> String { + let excluded_domains = vec![ + "amazon.com".to_string(), + "ebay.com".to_string(), + "aliexpress.com".to_string() + ]; + + let params = SearchParams { + query: "mountain hiking gear reviews".to_string(), + safe_search: Some(SafeSearchLevel::Off), + language: Some("en".to_string()), + region: Some("US".to_string()), + max_results: Some(4), + time_range: None, + include_domains: None, + exclude_domains: Some(excluded_domains.clone()), + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Searching hiking gear reviews (excluding e-commerce) using {} provider...", PROVIDER); + let response = web_search::search_once(¶ms); + + match response { + Ok((results, metadata)) => { + let mut output = String::new(); + output.push_str("Hiking gear reviews (non-commercial sources):\n\n"); + + for (i, result) in results.iter().enumerate() { + output.push_str(&format!("{}. {}\n", i + 1, result.title)); + output.push_str(&format!(" URL: {}\n", result.url)); + output.push_str(&format!(" Snippet: {}\n", result.snippet)); + + if let Some(images) = &result.images { + if !images.is_empty() { + output.push_str(&format!(" Images found: {}\n", images.len())); + for (j, image) in images.iter().enumerate().take(2) { + output.push_str(&format!(" Image {}: {}\n", j + 1, image.url)); + if let Some(desc) = &image.description { + output.push_str(&format!(" Description: {}\n", desc)); + } + } + } + } + + if let Some(html) = &result.html_snippet { + output.push_str( + &format!(" HTML content available: {} chars\n", html.len()) + ); + } + + output.push_str("\n"); + } + + output.push_str(&format!("Excluded domains: {}\n", excluded_domains.join(", "))); + + if let Some(meta) = metadata { + output.push_str("\nSearch metadata:\n"); + output.push_str(&format!(" Query: {}\n", meta.query)); + if let Some(total) = meta.total_results { + output.push_str(&format!(" Total results: {}\n", total)); + } + if let Some(time) = meta.search_time_ms { + output.push_str(&format!(" Search time: {:.2}ms\n", time)); + } + } + + output + } + Err(error) => { + let error_msg = format_search_error(error); + println!("{}", error_msg); + error_msg + } + } + } + + /// test6 demonstrates multilingual search with specific region + fn test6() -> String { + let params = SearchParams { + query: "slovenian recipes".to_string(), + safe_search: Some(SafeSearchLevel::Medium), + language: Some("en".to_string()), + region: Some("US".to_string()), + max_results: Some(5), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Searching Slovenian recipes in Slovenian language using {} provider...", PROVIDER); + let response = web_search::search_once(¶ms); + + match response { + Ok((results, metadata)) => { + let mut output = String::new(); + output.push_str("Slovenian traditional recipes (in Slovenian):\n\n"); + + if results.is_empty() { + output.push_str("No results found. This might be because:\n"); + output.push_str("- The provider doesn't support Slovenian language searches\n"); + output.push_str("- Limited content available in Slovenian\n"); + output.push_str("- Regional restrictions\n\n"); + } + + for (i, result) in results.iter().enumerate() { + output.push_str(&format!("{}. {}\n", i + 1, result.title)); + output.push_str(&format!(" URL: {}\n", result.url)); + output.push_str(&format!(" Snippet: {}\n", result.snippet)); + + if let Some(images) = &result.images { + if !images.is_empty() { + output.push_str(&format!(" Recipe images: {}\n", images.len())); + } + } + + output.push_str("\n"); + } + + if let Some(meta) = metadata { + output.push_str( + &format!( + "Search performed in: language={}, region={}\n", + meta.language.as_deref().unwrap_or("unknown"), + meta.region.as_deref().unwrap_or("unknown") + ) + ); + } + + output + } + Err(error) => { + let error_msg = format_search_error(error); + println!("{}", error_msg); + error_msg + } + } + } + + /// test7 demonstrates advanced search with high safe search and content chunks + fn test7() -> String { + let trusted_domains = vec![ + "commonsensemedia.org".to_string(), + "safekids.org".to_string(), + "connectsafely.org".to_string() + ]; + + let params = SearchParams { + query: "child safety internet guidelines parents".to_string(), + safe_search: Some(SafeSearchLevel::High), + language: Some("en".to_string()), + region: Some("US".to_string()), + max_results: Some(4), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: None, + include_html: None, + advanced_answer: None, + }; + + println!("Searching child safety resources with high safe search using {} provider...", PROVIDER); + let response = web_search::search_once(¶ms); + + match response { + Ok((results, metadata)) => { + let mut output = String::new(); + output.push_str("Child Internet Safety Resources (High Safe Search):\n\n"); + + for (i, result) in results.iter().enumerate() { + output.push_str(&format!("{}. {}\n", i + 1, result.title)); + output.push_str(&format!(" URL: {}\n", result.url)); + output.push_str(&format!(" Snippet: {}\n", result.snippet)); + + if let Some(chunks) = &result.content_chunks { + output.push_str(&format!(" Content chunks: {}\n", chunks.len())); + for (j, chunk) in chunks.iter().enumerate().take(2) { + let preview = if chunk.len() > 100 { + format!("{}...", &chunk[..100]) + } else { + chunk.clone() + }; + output.push_str(&format!(" Chunk {}: {}\n", j + 1, preview)); + } + } + + if let Some(score) = result.score { + output.push_str(&format!(" Relevance score: {:.2}\n", score)); + } + + output.push_str("\n"); + } + + if let Some(meta) = metadata { + output.push_str(&format!("Safe search level: {:?}\n", meta.safe_search)); + output.push_str(&format!("Time range: past year\n")); + output.push_str( + &format!("Target trusted domains: {}\n", trusted_domains.join(", ")) + ); + + if let Some(rate_limit) = &meta.rate_limits { + output.push_str( + &format!( + "Rate limit: {}/{} requests remaining\n", + rate_limit.remaining, + rate_limit.limit + ) + ); + } + } + + output + } + Err(error) => { + let error_msg = format_search_error(error); + println!("{}", error_msg); + error_msg + } + } + } +} + +fn format_search_error(error: SearchError) -> String { + match error { + SearchError::InvalidQuery => "ERROR: Invalid query provided".to_string(), + SearchError::RateLimited(retry_after) => { + format!("ERROR: Rate limited. Retry after {} seconds", retry_after) + } + SearchError::UnsupportedFeature(feature) => { + format!("ERROR: Unsupported feature: {}", feature) + } + SearchError::BackendError(message) => { format!("ERROR: Backend error: {}", message) } + } +} + +bindings::export!(Component with_types_in bindings); diff --git a/test/components-rust/test-websearch/wit/test-websearch.wit b/test/components-rust/test-websearch/wit/test-websearch.wit new file mode 100644 index 000000000..586023186 --- /dev/null +++ b/test/components-rust/test-websearch/wit/test-websearch.wit @@ -0,0 +1,19 @@ +package test:websearch; + +// See https://component-model.bytecodealliance.org/design/wit.html for more details about the WIT syntax + +interface test-websearch-api { + test1: func() -> string; + test2: func() -> string; + test3: func() -> string; + test4: func() -> string; + test5: func() -> string; + test6: func() -> string; + test7: func() -> string; +} + +world test-websearch { + import golem:web-search/web-search@1.0.0; + import golem:web-search/types@1.0.0; + export test-websearch-api; +} \ No newline at end of file diff --git a/test/wit/deps/golem-websearch/golem-web-search.wit b/test/wit/deps/golem-websearch/golem-web-search.wit new file mode 100644 index 000000000..550ee50f0 --- /dev/null +++ b/test/wit/deps/golem-websearch/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + current-page: u32, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result, search-error>; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + +world websearch-library { + export web-search; + export types; +} \ No newline at end of file diff --git a/websearch/Makefile.toml b/websearch/Makefile.toml new file mode 100644 index 000000000..a6db2b985 --- /dev/null +++ b/websearch/Makefile.toml @@ -0,0 +1,177 @@ +[config] +default_to_workspace = false +skip_core_tasks = true + +[tasks.build] +run_task = { name = [ + "build-brave", + "build-google", + "build-serper", + "build-tavily", +] } + +[tasks.build-portable] +run_task = { name = [ + "build-brave-portable", + "build-google-portable", + "build-serper-portable", + "build-tavily-portable", +] } + +[tasks.release-build] +run_task = { name = [ + "release-build-brave", + "release-build-google", + "release-build-serper", + "release-build-tavily", +] } + +[tasks.release-build-portable] +run_task = { name = [ + "release-build-brave-portable", + "release-build-google-portable", + "release-build-serper-portable", + "release-build-tavily-portable", +] } + +[tasks.build-tavily] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-tavily"] + + +[tasks.build-tavily-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-tavily", "--no-default-features"] + + +[tasks.build-brave] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-brave"] + +[tasks.build-brave-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-brave", "--no-default-features"] + +[tasks.build-google] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-google"] + +[tasks.build-google-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-google", "--no-default-features"] + +[tasks.build-serper] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-serper"] + +[tasks.build-serper-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-serper", "--no-default-features"] + +[tasks.release-build-tavily] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-tavily", "--release"] + +[tasks.release-build-tavily-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-tavily", "--release", "--no-default-features"] + +[tasks.release-build-brave] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-brave", "--release"] + +[tasks.release-build-brave-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-brave", "--release", "--no-default-features"] + +[tasks.release-build-google] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-google", "--release"] + +[tasks.release-build-google-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-google", "--release", "--no-default-features"] + +[tasks.release-build-serper] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = ["build", "-p", "golem-web-search-serper", "--release"] + +[tasks.release-build-serper-portable] +install_crate = { crate_name = "cargo-component", version = "0.20.0" } +command = "cargo-component" +args = [ + "build", + "-p", + "golem-web-search-serper", + "--release", + "--no-default-features", +] + +[tasks.wit-update] +install_crate = { crate_name = "wit-deps-cli" } +command = "wit-deps" +args = ["update"] + +[tasks.wit] +dependencies = ["wit-update"] + +script_runner = "@duckscript" +script = """ +modules = array websearch brave google serper tavily + +for module in ${modules} + rm -r ${module}/wit/deps + mkdir ${module}/wit/deps/golem-websearch + cp wit/golem-websearch.wit ${module}/wit/deps/golem-websearch/golem-websearch.wit + cp wit/deps/wasi:io ${module}/wit/deps + + echo "Copied WIT for module websearch::${module}" +end + +# Copy WIT files for integration tests +rm -r ../test/wit +mkdir ../test/wit/deps/golem-websearch +mkdir ../test/wit/deps/io +cp wit/golem-websearch.wit ../test/wit/deps/golem-websearch/golem-websearch.wit +cp wit/deps/wasi:io/error.wit ../test/wit/deps/io/error.wit +cp wit/deps/wasi:io/poll.wit ../test/wit/deps/io/poll.wit +cp wit/deps/wasi:io/streams.wit ../test/wit/deps/io/streams.wit +cp wit/deps/wasi:io/world.wit ../test/wit/deps/io/world.wit + +echo "Copied WIT for module test" +""" + +[tasks.build-test-components] +dependencies = ["build"] +install_crate = "cargo-binstall" +description = "Builds websearch test components with golem-cli" +script = ''' +cargo-binstall golem-cli@1.2.2-dev.11 --locked --no-confirm +cargo-binstall wac-cli --locked --no-confirm +cd ../test + +golem-cli --version +golem-cli app clean +golem-cli app build -b brave-debug +golem-cli app clean +golem-cli app build -b google-debug +golem-cli app clean +golem-cli app build -b serper-debug +golem-cli app clean +golem-cli app build -b tavily-debug +''' \ No newline at end of file diff --git a/websearch/brave/Cargo.toml b/websearch/brave/Cargo.toml new file mode 100644 index 000000000..3dc3f6352 --- /dev/null +++ b/websearch/brave/Cargo.toml @@ -0,0 +1,54 @@ +[package] +name = "golem-web-search-brave" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly component for querying Brave Search APIs via the golem:web-search interface" + +[lib] +crate-type = ["cdylib"] +path = "src/lib.rs" + +[features] +default = ["durability"] +durability = ["golem-rust/durability", "golem-web-search/durability"] + +[dependencies] +golem-web-search = { path = "../websearch", version = "0.0.0", default-features = false } +golem-rust = { workspace = true } +log = { workspace = true } +reqwest = { workspace = true, features = ["json"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } +base64 = { workspace = true } +url = "2.5" +urlencoding = "2.1" + +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasm-bindgen = "0.2" + +[profile.release] +opt-level = "s" +lto = true +codegen-units = 1 +panic = "abort" + +[package.metadata.component] +package = "golem:web-search-brave" + +[package.metadata.component.bindings] +generate_unused_types = true + +[package.metadata.component.bindings.with] +"golem:web-search/web-search@1.0.0" = "golem_websearch::golem::websearch::websearch" +"golem:web-search/types@1.0.0" = "golem_websearch::golem::websearch::types" + +[package.metadata.component.target] +path = "wit" + +[package.metadata.component.target.dependencies] +"golem:websearch" = { path = "wit/deps/golem-web-search" } +"wasi:io" = { path = "wit/deps/wasi:io" } diff --git a/websearch/brave/src/bindings.rs b/websearch/brave/src/bindings.rs new file mode 100644 index 000000000..cd2764825 --- /dev/null +++ b/websearch/brave/src/bindings.rs @@ -0,0 +1,49 @@ +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! +// Options used: +// * runtime_path: "wit_bindgen_rt" +// * with "golem:web-search/types@1.0.0" = "golem_websearch::golem::websearch::types" +// * with "golem:web-search/web-search@1.0.0" = "golem_websearch::golem::websearch::websearch" +// * generate_unused_types +use golem_websearch::golem::websearch::types as __with_name0; +use golem_websearch::golem::websearch::websearch as __with_name1; +#[cfg(target_arch = "wasm32")] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:web-search-brave@1.0.0:websearch-library:encoded world" +)] +#[doc(hidden)] +#[allow(clippy::octal_escapes)] +pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1388] = *b"\ +\0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xe4\x09\x01A\x02\x01\ +A\x08\x01B\x1c\x01ks\x01r\x02\x03urls\x0bdescription\0\x04\0\x0cimage-result\x03\ +\0\x01\x01ku\x01p\x02\x01k\x04\x01ps\x01k\x06\x01r\x0a\x05titles\x03urls\x07snip\ +pets\x0bdisplay-url\0\x06source\0\x05score\x03\x0chtml-snippet\0\x0edate-publish\ +ed\0\x06images\x05\x0econtent-chunks\x07\x04\0\x0dsearch-result\x03\0\x08\x01m\x03\ +\x03off\x06medium\x04high\x04\0\x11safe-search-level\x03\0\x0a\x01r\x03\x05limit\ +y\x09remainingy\x0freset-timestampw\x04\0\x0frate-limit-info\x03\0\x0c\x01kw\x01\ +k\x0b\x01k\x0d\x01r\x09\x05querys\x0dtotal-results\x0e\x0esearch-time-ms\x03\x0b\ +safe-search\x0f\x08language\0\x06region\0\x0fnext-page-token\0\x0brate-limits\x10\ +\x0ccurrent-pagey\x04\0\x0fsearch-metadata\x03\0\x11\x01m\x04\x03day\x04week\x05\ +month\x04year\x04\0\x0atime-range\x03\0\x13\x01ky\x01k\x14\x01k\x7f\x01r\x0b\x05\ +querys\x0bsafe-search\x0f\x08language\0\x06region\0\x0bmax-results\x15\x0atime-r\ +ange\x16\x0finclude-domains\x07\x0fexclude-domains\x07\x0einclude-images\x17\x0c\ +include-html\x17\x0fadvanced-answer\x17\x04\0\x0dsearch-params\x03\0\x18\x01q\x04\ +\x0dinvalid-query\0\0\x0crate-limited\x01y\0\x13unsupported-feature\x01s\0\x0dba\ +ckend-error\x01s\0\x04\0\x0csearch-error\x03\0\x1a\x04\0\x1cgolem:web-search/typ\ +es@1.0.0\x05\0\x02\x03\0\0\x0dsearch-params\x02\x03\0\0\x0dsearch-result\x02\x03\ +\0\0\x0fsearch-metadata\x02\x03\0\0\x0csearch-error\x01B\x19\x02\x03\x02\x01\x01\ +\x04\0\x0dsearch-params\x03\0\0\x02\x03\x02\x01\x02\x04\0\x0dsearch-result\x03\0\ +\x02\x02\x03\x02\x01\x03\x04\0\x0fsearch-metadata\x03\0\x04\x02\x03\x02\x01\x04\x04\ +\0\x0csearch-error\x03\0\x06\x04\0\x0esearch-session\x03\x01\x01h\x08\x01p\x03\x01\ +j\x01\x0a\x01\x07\x01@\x01\x04self\x09\0\x0b\x04\0\x20[method]search-session.nex\ +t-page\x01\x0c\x01k\x05\x01@\x01\x04self\x09\0\x0d\x04\0#[method]search-session.\ +get-metadata\x01\x0e\x01i\x08\x01j\x01\x0f\x01\x07\x01@\x01\x06params\x01\0\x10\x04\ +\0\x0cstart-search\x01\x11\x01o\x02\x0a\x0d\x01j\x01\x12\x01\x07\x01@\x01\x06par\ +ams\x01\0\x13\x04\0\x0bsearch-once\x01\x14\x04\0!golem:web-search/web-search@1.0\ +.0\x05\x05\x04\0.golem:web-search-brave/websearch-library@1.0.0\x04\0\x0b\x17\x01\ +\0\x11websearch-library\x03\0\0\0G\x09producers\x01\x0cprocessed-by\x02\x0dwit-c\ +omponent\x070.227.1\x10wit-bindgen-rust\x060.41.0"; +#[inline(never)] +#[doc(hidden)] +pub fn __link_custom_section_describing_imports() { + wit_bindgen_rt::maybe_link_cabi_realloc(); +} diff --git a/websearch/brave/src/client.rs b/websearch/brave/src/client.rs new file mode 100644 index 000000000..bb4fb5e41 --- /dev/null +++ b/websearch/brave/src/client.rs @@ -0,0 +1,127 @@ +use golem_web_search::error::from_reqwest_error; +use golem_web_search::golem::web_search::web_search::SearchError; +use log::trace; +use reqwest::Method; +use reqwest::{Client, Response}; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; +use std::fmt::Debug; + +const BASE_URL: &str = "https://api.search.brave.com/res/v1/web/search"; + +/// The Brave Search API client for web search. +pub struct BraveSearchApi { + client: Client, + pub api_key: String, +} + +impl BraveSearchApi { + pub fn new(api_key: String) -> Self { + let client = Client::builder() + .user_agent("Golem-Web-Search/1.0") + .build() + .expect("Failed to initialize HTTP client"); + + Self { client, api_key } + } + + pub fn search(&self, request: SearchRequest) -> Result { + trace!("Sending request to Brave Search API: {request:?}"); + + let response = self + .client + .request(Method::GET, BASE_URL) + .header("X-Subscription-Token", &self.api_key) + .header("Accept", "application/json") + .query(&[ + ("q", &request.query), + ("count", &request.count.unwrap_or(10).to_string()), + ("offset", &request.offset.unwrap_or(0).to_string()), + ]) + .send() + .map_err(|err| from_reqwest_error("Request failed", err))?; + + parse_response(response) + } + + pub fn api_key(&self) -> &String { + &self.api_key + } +} + +#[derive(Debug, Clone)] +pub struct SearchRequest { + pub query: String, + pub count: Option, + pub offset: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResponse { + pub query: QueryInfo, + pub web: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QueryInfo { + pub original: String, + pub more_results_available: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WebResults { + pub results: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WebResult { + pub title: String, + pub url: String, + pub description: String, + pub date: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ErrorResponse { + pub message: String, + #[serde(rename = "type")] + pub error_type: String, +} + +fn parse_response(response: Response) -> Result { + let status = response.status(); + if status.is_success() { + let body = response + .json::() + .map_err(|err| from_reqwest_error("Failed to decode response body", err))?; + + trace!("Received response from Brave Search API: {body:?}"); + Ok(body) + } else { + // Try to parse error response + match response.json::() { + Ok(error_body) => { + trace!("Received {status} response from Brave Search API: {error_body:?}"); + + let search_error = match status.as_u16() { + 400 => SearchError::InvalidQuery, + 401 => SearchError::BackendError("Invalid API key".to_string()), + 403 => SearchError::BackendError("API key quota exceeded".to_string()), + 429 => SearchError::RateLimited(60), // Default to 60 seconds + _ => SearchError::BackendError(format!( + "Request failed with {}: {}", + status, error_body.message + )), + }; + + Err(search_error) + } + Err(_) => { + // Fallback for non-JSON error responses + Err(SearchError::BackendError(format!( + "Request failed with status {status}" + ))) + } + } + } +} diff --git a/websearch/brave/src/conversions.rs b/websearch/brave/src/conversions.rs new file mode 100644 index 000000000..7bf0e9e4f --- /dev/null +++ b/websearch/brave/src/conversions.rs @@ -0,0 +1,162 @@ +use crate::client::{SearchRequest, SearchResponse, WebResult}; +use golem_web_search::golem::web_search::web_search::{ + SearchError, SearchMetadata, SearchParams, SearchResult, +}; + +pub fn params_to_request(params: SearchParams, offset: u32) -> Result { + // Validate query + if params.query.trim().is_empty() { + return Err(SearchError::InvalidQuery); + } + + // Handle domain filtering in query + let mut query = params.query.clone(); + if let Some(exclude_domains) = ¶ms.exclude_domains { + for domain in exclude_domains { + query.push_str(&format!(" -site:{domain}")); + } + } + + Ok(SearchRequest { + query, + count: Some(params.max_results.unwrap_or(10)), + offset: Some(offset), + }) +} + +pub fn response_to_results( + response: SearchResponse, + original_params: &SearchParams, + current_offset: u32, +) -> (Vec, SearchMetadata) { + let mut results = Vec::new(); + + // Process web results + if let Some(ref web_results) = response.web { + for (index, item) in web_results.results.iter().enumerate() { + results.push(web_result_to_search_result(item, index)); + } + } + + let metadata = create_search_metadata(&response, original_params, current_offset); + (results, metadata) +} + +fn web_result_to_search_result(item: &WebResult, index: usize) -> SearchResult { + let mut content_chunks = None; + + // Create content chunks from description + let mut chunks = Vec::new(); + if !item.description.is_empty() { + chunks.push(item.description.clone()); + } + + if !chunks.is_empty() { + content_chunks = Some(chunks); + } + + // Simple position-based scoring + let score = 1.0 - (index as f32) * 0.05; + + SearchResult { + title: item.title.clone(), + url: item.url.clone(), + snippet: item.description.clone(), + display_url: extract_domain(&item.url), + source: extract_domain(&item.url), + score: Some(score.clamp(0.0, 1.0) as f64), + html_snippet: None, + date_published: item.date.clone(), + images: None, + content_chunks, + } +} + +fn extract_domain(url: &str) -> Option { + if let Ok(parsed_url) = url::Url::parse(url) { + parsed_url.host_str().map(|host| { + // Remove www. prefix if present + if let Some(stripped) = host.strip_prefix("www.") { + stripped.to_string() + } else { + host.to_string() + } + }) + } else { + None + } +} + +fn create_search_metadata( + response: &SearchResponse, + params: &SearchParams, + current_offset: u32, +) -> SearchMetadata { + // Check if we got the full count requested + let has_more_results = if let Some(web_results) = &response.web { + let requested_count = params.max_results.unwrap_or(10); + web_results.results.len() == (requested_count as usize) + } else { + false + }; + + // Create next page token if more results are available + let next_page_token = if has_more_results { + let next_offset = current_offset + params.max_results.unwrap_or(10); + Some(next_offset.to_string()) + } else { + None + }; + + // Simple total results estimation + let total_results = if let Some(web_results) = &response.web { + if web_results.results.len() >= (params.max_results.unwrap_or(10) as usize) { + Some(100000u64) // Conservative estimate + } else { + Some(web_results.results.len() as u64) + } + } else { + Some(0u64) + }; + + SearchMetadata { + query: params.query.clone(), + total_results, + search_time_ms: None, + safe_search: params.safe_search, + language: params.language.clone(), + region: params.region.clone(), + next_page_token, + rate_limits: None, + current_page: current_offset, + } +} + +pub fn validate_search_params(params: &SearchParams) -> Result<(), SearchError> { + if params.query.trim().is_empty() { + return Err(SearchError::InvalidQuery); + } + if let Some(max_results) = params.max_results { + if max_results > 20 { + return Err(SearchError::UnsupportedFeature( + "max_results cannot exceed 20 for Brave Search".to_string(), + )); + } + } + if params.include_images == Some(true) { + return Err(SearchError::UnsupportedFeature( + "include-images not supported".to_string(), + )); + } + if params.include_html == Some(true) { + return Err(SearchError::UnsupportedFeature( + "include-html not supported".to_string(), + )); + } + if params.advanced_answer == Some(true) { + return Err(SearchError::UnsupportedFeature( + "advanced-answer not supported".to_string(), + )); + } + Ok(()) +} diff --git a/websearch/brave/src/lib.rs b/websearch/brave/src/lib.rs new file mode 100644 index 000000000..c23a7a468 --- /dev/null +++ b/websearch/brave/src/lib.rs @@ -0,0 +1,198 @@ +mod client; +mod conversions; + +use std::cell::RefCell; + +use crate::client::{BraveSearchApi, SearchRequest}; +use crate::conversions::{params_to_request, response_to_results, validate_search_params}; +use golem_web_search::durability::Durablewebsearch; +use golem_web_search::durability::ExtendedwebsearchGuest; +use golem_web_search::golem::web_search::web_search::{ + Guest, GuestSearchSession, SearchError, SearchMetadata, SearchParams, SearchResult, + SearchSession, +}; + +use golem_web_search::LOGGING_STATE; + +// Define a custom ReplayState struct +#[derive(Debug, Clone, PartialEq, golem_rust::FromValueAndType, golem_rust::IntoValue)] +pub struct BraveReplayState { + pub api_key: String, + pub current_offset: u32, + pub metadata: Option, + pub finished: bool, +} + +struct BraveSearch { + client: BraveSearchApi, + request: SearchRequest, + params: SearchParams, + metadata: Option, + current_offset: u32, +} + +impl BraveSearch { + fn new(client: BraveSearchApi, request: SearchRequest, params: SearchParams) -> Self { + Self { + client, + request, + params, + metadata: None, + current_offset: 0, + } + } + + fn next_page(&mut self) -> Result<(Vec, bool), SearchError> { + // Update request with current offset + let mut request = self.request.clone(); + request.offset = Some(self.current_offset); + + let response = self.client.search(request)?; + let (results, metadata) = response_to_results(response, &self.params, self.current_offset); + + // Always increment current_offset after a page fetch + self.current_offset += 1; + + // Check if more results are available + let count = self.request.count.unwrap_or(10); + let has_more_results = results.len() == (count as usize); + let has_next_page = metadata.next_page_token.is_some(); + let finished = !has_more_results || !has_next_page; + + self.metadata = Some(metadata); + Ok((results, finished)) + } + + fn get_metadata(&self) -> Option { + self.metadata.clone() + } +} + +// Create a wrapper that implements GuestSearchSession properly +struct BraveSearchSession(RefCell); + +impl BraveSearchSession { + fn new(search: BraveSearch) -> Self { + Self(RefCell::new(search)) + } +} + +impl GuestSearchSession for BraveSearchSession { + fn next_page(&self) -> Result, SearchError> { + let mut search = self.0.borrow_mut(); + let (results, _) = search.next_page()?; + Ok(results) + } + + fn get_metadata(&self) -> Option { + let search = self.0.borrow(); + search.get_metadata() + } +} + +struct BraveSearchComponent; + +impl BraveSearchComponent { + const API_KEY_VAR: &'static str = "BRAVE_API_KEY"; + + fn create_client() -> Result { + let api_key = Self::get_api_key()?; + Ok(BraveSearchApi::new(api_key)) + } + + fn get_api_key() -> Result { + std::env::var(Self::API_KEY_VAR).map_err(|_| { + SearchError::BackendError("BRAVE_API_KEY environment variable not set".to_string()) + }) + } + + fn execute_search( + params: SearchParams, + _api_key: String, + ) -> Result<(Vec, SearchMetadata), SearchError> { + validate_search_params(¶ms)?; + + let client = Self::create_client()?; + let request = params_to_request(params.clone(), 0)?; + + let response = client.search(request)?; + let (results, metadata) = response_to_results(response, ¶ms, 0); + + Ok((results, metadata)) + } + + fn start_search_session( + params: SearchParams, + _api_key: String, + ) -> Result { + validate_search_params(¶ms)?; + + let client = Self::create_client()?; + let request = params_to_request(params.clone(), 0)?; + + let search = BraveSearch::new(client, request, params); + Ok(BraveSearchSession::new(search)) + } +} + +impl Guest for BraveSearchComponent { + type SearchSession = BraveSearchSession; + + fn start_search(params: SearchParams) -> Result { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + match Self::start_search_session(params, Self::get_api_key()?) { + Ok(session) => Ok(SearchSession::new(session)), + Err(err) => Err(err), + } + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + let (results, metadata) = Self::execute_search(params, Self::get_api_key()?)?; + Ok((results, Some(metadata))) + } +} + +// ExtendedwebsearchGuest implementation +impl ExtendedwebsearchGuest for BraveSearchComponent { + type ReplayState = BraveReplayState; + + fn unwrapped_search_session(params: SearchParams) -> Result { + let api_key = Self::get_api_key()?; + let client = BraveSearchApi::new(api_key.clone()); + let request = crate::conversions::params_to_request(params.clone(), 0)?; + let search = BraveSearch::new(client, request, params); + Ok(BraveSearchSession::new(search)) + } + + fn session_to_state(session: &Self::SearchSession) -> Self::ReplayState { + let mut search = session.0.borrow_mut(); + let (_, finished) = search.next_page().unwrap_or((vec![], true)); + BraveReplayState { + api_key: search.client.api_key().clone(), + current_offset: search.current_offset, + metadata: search.metadata.clone(), + finished, + } + } + + fn session_from_state( + state: &Self::ReplayState, + params: SearchParams, + ) -> Result { + let client = BraveSearchApi::new(state.api_key.clone()); + let request = crate::conversions::params_to_request(params.clone(), 0)?; + let mut search = BraveSearch::new(client, request, params); + search.current_offset = state.current_offset; + search.metadata = state.metadata.clone(); + if state.finished { + let _ = search.next_page(); + } + Ok(BraveSearchSession::new(search)) + } +} + +type DurableBraveComponent = Durablewebsearch; +golem_web_search::export_websearch!(DurableBraveComponent with_types_in golem_web_search); diff --git a/websearch/brave/wit/brave.wit b/websearch/brave/wit/brave.wit new file mode 100644 index 000000000..c3f54c648 --- /dev/null +++ b/websearch/brave/wit/brave.wit @@ -0,0 +1,6 @@ +package golem:web-search-brave@1.0.0; + +world websearch-library { + export golem:web-search/web-search@1.0.0; + export golem:web-search/types@1.0.0; +} diff --git a/websearch/brave/wit/deps/golem-web-search/golem-web-search.wit b/websearch/brave/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..550ee50f0 --- /dev/null +++ b/websearch/brave/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + current-page: u32, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result, search-error>; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + +world websearch-library { + export web-search; + export types; +} \ No newline at end of file diff --git a/websearch/brave/wit/deps/wasi:io/error.wit b/websearch/brave/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/websearch/brave/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/websearch/brave/wit/deps/wasi:io/poll.wit b/websearch/brave/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/websearch/brave/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/websearch/brave/wit/deps/wasi:io/streams.wit b/websearch/brave/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/websearch/brave/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/websearch/brave/wit/deps/wasi:io/world.wit b/websearch/brave/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/websearch/brave/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/websearch/google/Cargo.toml b/websearch/google/Cargo.toml new file mode 100644 index 000000000..5a9aa266b --- /dev/null +++ b/websearch/google/Cargo.toml @@ -0,0 +1,55 @@ +[package] +name = "golem-web-search-google" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly component for querying Google Search APIs via the golem:web-search interface" + +[lib] +crate-type = ["cdylib"] +path = "src/lib.rs" + +[features] +default = ["durability"] +durability = ["golem-rust/durability", "golem-web-search/durability"] + +[dependencies] +golem-web-search = { path = "../websearch", version = "0.0.0", default-features = false } +golem-rust = { workspace = true } +log = { workspace = true } +reqwest = { workspace = true, features = ["json"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } +base64 = { workspace = true } +url = "2.5" +urlencoding = "2.1" + +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasm-bindgen = "0.2" + +[profile.release] +opt-level = "s" +lto = true +codegen-units = 1 +panic = "abort" + +[package.metadata.component] +package = "golem:web-search-google" + +[package.metadata.component.bindings] +generate_unused_types = true + +[package.metadata.component.bindings.with] +"golem:web-search/web-search@1.0.0" = "golem_websearch::golem::websearch::websearch" +"golem:web-search/types@1.0.0" = "golem_websearch::golem::websearch::types" + + +[package.metadata.component.target] +path = "wit" + +[package.metadata.component.target.dependencies] +"golem:websearch" = { path = "wit/deps/golem-web-search" } +"wasi:io" = { path = "wit/deps/wasi:io" } diff --git a/websearch/google/src/bindings.rs b/websearch/google/src/bindings.rs new file mode 100644 index 000000000..759cbb8c5 --- /dev/null +++ b/websearch/google/src/bindings.rs @@ -0,0 +1,49 @@ +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! +// Options used: +// * runtime_path: "wit_bindgen_rt" +// * with "golem:web-search/web-search@1.0.0" = "golem_websearch::golem::websearch::websearch" +// * with "golem:web-search/types@1.0.0" = "golem_websearch::golem::websearch::types" +// * generate_unused_types +use golem_websearch::golem::websearch::types as __with_name0; +use golem_websearch::golem::websearch::websearch as __with_name1; +#[cfg(target_arch = "wasm32")] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:web-search-google@1.0.0:websearch-library:encoded world" +)] +#[doc(hidden)] +#[allow(clippy::octal_escapes)] +pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1389] = *b"\ +\0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xe5\x09\x01A\x02\x01\ +A\x08\x01B\x1c\x01ks\x01r\x02\x03urls\x0bdescription\0\x04\0\x0cimage-result\x03\ +\0\x01\x01ku\x01p\x02\x01k\x04\x01ps\x01k\x06\x01r\x0a\x05titles\x03urls\x07snip\ +pets\x0bdisplay-url\0\x06source\0\x05score\x03\x0chtml-snippet\0\x0edate-publish\ +ed\0\x06images\x05\x0econtent-chunks\x07\x04\0\x0dsearch-result\x03\0\x08\x01m\x03\ +\x03off\x06medium\x04high\x04\0\x11safe-search-level\x03\0\x0a\x01r\x03\x05limit\ +y\x09remainingy\x0freset-timestampw\x04\0\x0frate-limit-info\x03\0\x0c\x01kw\x01\ +k\x0b\x01k\x0d\x01r\x09\x05querys\x0dtotal-results\x0e\x0esearch-time-ms\x03\x0b\ +safe-search\x0f\x08language\0\x06region\0\x0fnext-page-token\0\x0brate-limits\x10\ +\x0ccurrent-pagey\x04\0\x0fsearch-metadata\x03\0\x11\x01m\x04\x03day\x04week\x05\ +month\x04year\x04\0\x0atime-range\x03\0\x13\x01ky\x01k\x14\x01k\x7f\x01r\x0b\x05\ +querys\x0bsafe-search\x0f\x08language\0\x06region\0\x0bmax-results\x15\x0atime-r\ +ange\x16\x0finclude-domains\x07\x0fexclude-domains\x07\x0einclude-images\x17\x0c\ +include-html\x17\x0fadvanced-answer\x17\x04\0\x0dsearch-params\x03\0\x18\x01q\x04\ +\x0dinvalid-query\0\0\x0crate-limited\x01y\0\x13unsupported-feature\x01s\0\x0dba\ +ckend-error\x01s\0\x04\0\x0csearch-error\x03\0\x1a\x04\0\x1cgolem:web-search/typ\ +es@1.0.0\x05\0\x02\x03\0\0\x0dsearch-params\x02\x03\0\0\x0dsearch-result\x02\x03\ +\0\0\x0fsearch-metadata\x02\x03\0\0\x0csearch-error\x01B\x19\x02\x03\x02\x01\x01\ +\x04\0\x0dsearch-params\x03\0\0\x02\x03\x02\x01\x02\x04\0\x0dsearch-result\x03\0\ +\x02\x02\x03\x02\x01\x03\x04\0\x0fsearch-metadata\x03\0\x04\x02\x03\x02\x01\x04\x04\ +\0\x0csearch-error\x03\0\x06\x04\0\x0esearch-session\x03\x01\x01h\x08\x01p\x03\x01\ +j\x01\x0a\x01\x07\x01@\x01\x04self\x09\0\x0b\x04\0\x20[method]search-session.nex\ +t-page\x01\x0c\x01k\x05\x01@\x01\x04self\x09\0\x0d\x04\0#[method]search-session.\ +get-metadata\x01\x0e\x01i\x08\x01j\x01\x0f\x01\x07\x01@\x01\x06params\x01\0\x10\x04\ +\0\x0cstart-search\x01\x11\x01o\x02\x0a\x0d\x01j\x01\x12\x01\x07\x01@\x01\x06par\ +ams\x01\0\x13\x04\0\x0bsearch-once\x01\x14\x04\0!golem:web-search/web-search@1.0\ +.0\x05\x05\x04\0/golem:web-search-google/websearch-library@1.0.0\x04\0\x0b\x17\x01\ +\0\x11websearch-library\x03\0\0\0G\x09producers\x01\x0cprocessed-by\x02\x0dwit-c\ +omponent\x070.227.1\x10wit-bindgen-rust\x060.41.0"; +#[inline(never)] +#[doc(hidden)] +pub fn __link_custom_section_describing_imports() { + wit_bindgen_rt::maybe_link_cabi_realloc(); +} diff --git a/websearch/google/src/client.rs b/websearch/google/src/client.rs new file mode 100644 index 000000000..7c06e94c5 --- /dev/null +++ b/websearch/google/src/client.rs @@ -0,0 +1,275 @@ +use golem_web_search::error::from_reqwest_error; +use golem_web_search::golem::web_search::web_search::SearchError; +use log::trace; +use reqwest::Url; +use reqwest::{Client, Method, Response}; +use serde::{Deserialize, Serialize}; + +const BASE_URL: &str = "https://www.googleapis.com/customsearch/v1"; + +/// Google Custom Search API client for web search. +pub struct GoogleSearchApi { + client: Client, + pub api_key: String, + pub search_engine_id: String, +} + +impl GoogleSearchApi { + pub fn new(api_key: String, search_engine_id: String) -> Self { + let client = Client::builder() + .user_agent("Golem-Web-Search/1.0") + .build() + .expect("Failed to initialize HTTP client"); + + Self { + client, + api_key, + search_engine_id, + } + } + + pub fn search(&self, request: SearchRequest) -> Result { + trace!("Sending request to Google Custom Search API: {request:?}"); + + let mut url = Url::parse(BASE_URL).expect("Invalid base URL"); + { + let mut query_pairs = url.query_pairs_mut(); + query_pairs.append_pair("key", &self.api_key); + query_pairs.append_pair("cx", &self.search_engine_id); + query_pairs.append_pair("q", &urlencoding::encode(&request.query)); + if let Some(num) = request.max_results { + query_pairs.append_pair("num", &num.to_string()); + } + if let Some(start) = request.start { + query_pairs.append_pair("start", &start.to_string()); + } + if let Some(safe) = &request.safe { + query_pairs.append_pair("safe", safe); + } + if let Some(lr) = &request.lr { + query_pairs.append_pair("lr", lr); + } + if let Some(gl) = &request.gl { + query_pairs.append_pair("gl", gl); + } + if let Some(date_restrict) = &request.date_restrict { + query_pairs.append_pair("dateRestrict", date_restrict); + } + if let Some(site_search) = &request.site_search { + query_pairs.append_pair("siteSearch", &urlencoding::encode(site_search)); + } + if let Some(site_search_filter) = &request.site_search_filter { + query_pairs.append_pair("siteSearchFilter", site_search_filter); + } + if request.img_type.is_some() || request.img_size.is_some() { + query_pairs.append_pair("searchType", "image"); + if let Some(img_type) = &request.img_type { + query_pairs.append_pair("imgType", img_type); + } + if let Some(img_size) = &request.img_size { + query_pairs.append_pair("imgSize", img_size); + } + } + } + let response = self + .client + .request(Method::GET, url.as_str()) + .send() + .map_err(|err| from_reqwest_error("Failed to send request", err))?; + + parse_response(response) + } + + pub fn api_key(&self) -> &String { + &self.api_key + } + + pub fn search_engine_id(&self) -> &String { + &self.search_engine_id + } +} + +#[derive(Debug, Clone)] +pub struct SearchRequest { + pub query: String, + pub max_results: Option, + pub start: Option, + pub safe: Option, + pub lr: Option, + pub gl: Option, + pub date_restrict: Option, + pub site_search: Option, + pub site_search_filter: Option, + pub img_type: Option, + pub img_size: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResponse { + pub query: String, + pub response_time: f32, + pub total_results: Option, + pub results: Vec, + pub next_page: Option, + pub previous_page: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResult { + pub title: String, + pub url: String, + pub content: String, + pub published_date: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct GoogleApiResponse { + pub kind: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub queries: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub search_information: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub items: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct GoogleSearchQueries { + #[serde(skip_serializing_if = "Option::is_none")] + pub request: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub next_page: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub previous_page: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct GoogleQueryInfo { + #[serde(rename = "searchTerms")] + pub search_terms: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct GoogleSearchInformation { + #[serde(rename = "searchTime")] + pub search_time: f64, + #[serde(rename = "totalResults")] + pub total_results: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct GoogleSearchItem { + pub title: String, + pub link: String, + pub snippet: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NextPage { + #[serde(rename = "startIndex")] + pub start_index: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PreviousPage { + #[serde(rename = "startIndex")] + pub start_index: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ErrorResponse { + pub error: ErrorResponseDetails, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ErrorResponseDetails { + pub code: u32, + pub message: String, +} + +fn parse_response(response: Response) -> Result { + let status = response.status(); + if status.is_success() { + let google_response: GoogleApiResponse = response + .json() + .map_err(|err| from_reqwest_error("Failed to decode response body", err))?; + + trace!("Received response from Google Custom Search API: {google_response:?}"); + + // Convert Google response + let query = google_response + .queries + .as_ref() + .and_then(|q| q.request.as_ref()) + .and_then(|r| r.first().map(|qi| qi.search_terms.clone())) + .unwrap_or_default(); + + let response_time = google_response + .search_information + .as_ref() + .map(|info| info.search_time as f32) + .unwrap_or(0.0); + + let total_results = google_response + .search_information + .and_then(|info| info.total_results.parse::().ok()); + + let next_page = google_response + .queries + .as_ref() + .and_then(|q| q.next_page.as_ref()) + .and_then(|np| np.first().cloned()); + + let previous_page = google_response + .queries + .and_then(|q| q.previous_page) + .and_then(|pp| pp.first().cloned()); + + let results = google_response + .items + .unwrap_or_default() + .into_iter() + .map(|item| SearchResult { + title: item.title, + url: item.link, + content: item.snippet, + published_date: None, // Google doesn't provide this in basic search + }) + .collect(); + + Ok(SearchResponse { + query, + response_time, + total_results, + results, + next_page, + previous_page, + }) + } else { + // Try to parse error response + match response.json::() { + Ok(error_body) => { + trace!("Received {status} response from Google Custom Search API: {error_body:?}"); + + let search_error = match error_body.error.code { + 400 => SearchError::InvalidQuery, + 401 => SearchError::BackendError("Invalid API key".to_string()), + 403 => SearchError::BackendError("API key quota exceeded".to_string()), + 429 => SearchError::RateLimited(60), // Default to 60 seconds + _ => SearchError::BackendError(format!( + "Request failed with {}: {}", + status, error_body.error.message + )), + }; + + Err(search_error) + } + Err(_) => { + // Fallback for non-JSON error responses + Err(SearchError::BackendError(format!( + "Request failed with status {status}" + ))) + } + } + } +} diff --git a/websearch/google/src/conversions.rs b/websearch/google/src/conversions.rs new file mode 100644 index 000000000..3f7880601 --- /dev/null +++ b/websearch/google/src/conversions.rs @@ -0,0 +1,175 @@ +use crate::client::{SearchRequest, SearchResponse, SearchResult as ClientSearchResult}; +use golem_web_search::golem::web_search::types::SafeSearchLevel; +use golem_web_search::golem::web_search::web_search::{ + SearchError, SearchMetadata, SearchParams, SearchResult, +}; + +pub fn params_to_request(params: SearchParams, start: u32) -> Result { + // Validate query + if params.query.trim().is_empty() { + return Err(SearchError::InvalidQuery); + } + + // Handle domain filtering in query + let mut query = params.query.clone(); + + // Add included domains + if let Some(include_domains) = ¶ms.include_domains { + if !include_domains.is_empty() { + let site_filter = include_domains + .iter() + .map(|domain| format!("site:{domain}")) + .collect::>() + .join(" OR "); + query = format!("({site_filter}) {query}"); + } + } + + // Add excluded domains + if let Some(exclude_domains) = ¶ms.exclude_domains { + for domain in exclude_domains { + query.push_str(&format!(" -site:{domain}")); + } + } + + Ok(SearchRequest { + query, + max_results: params.max_results, + start: Some(start), + safe: params.safe_search.map(|safe| match safe { + SafeSearchLevel::Off => "off".to_string(), + SafeSearchLevel::Medium => "medium".to_string(), + SafeSearchLevel::High => "active".to_string(), + }), + lr: params.language.clone(), + gl: params.region.clone(), + date_restrict: None, + site_search: None, + site_search_filter: None, + img_type: None, + img_size: None, + }) +} + +pub fn response_to_results( + response: SearchResponse, + original_params: &SearchParams, + current_start: u32, +) -> (Vec, SearchMetadata) { + let mut results = Vec::new(); + + // Process web results - note: SearchResponse.results, not SearchResponse.web + for (index, item) in response.results.iter().enumerate() { + results.push(web_result_to_search_result(item, index)); + } + + let metadata = create_search_metadata(&response, original_params, current_start); + (results, metadata) +} + +fn web_result_to_search_result(item: &ClientSearchResult, index: usize) -> SearchResult { + let mut content_chunks = None; + + // Create content chunks from content + let mut chunks = Vec::new(); + if !item.content.is_empty() { + chunks.push(item.content.clone()); + } + + if !chunks.is_empty() { + content_chunks = Some(chunks); + } + + // Simple position-based scoring + let score = 1.0 - (index as f32) * 0.05; + + SearchResult { + title: item.title.clone(), + url: item.url.clone(), + snippet: item.content.clone(), + display_url: extract_domain(&item.url), + source: extract_domain(&item.url), + score: Some(score.clamp(0.0, 1.0) as f64), + html_snippet: None, + date_published: item.published_date.clone(), + images: None, + content_chunks, + } +} + +fn extract_domain(url: &str) -> Option { + if let Ok(parsed_url) = url::Url::parse(url) { + parsed_url.host_str().map(|host| { + // Remove www. prefix if present + if let Some(stripped) = host.strip_prefix("www.") { + stripped.to_string() + } else { + host.to_string() + } + }) + } else { + None + } +} + +fn create_search_metadata( + response: &SearchResponse, + params: &SearchParams, + current_start: u32, +) -> SearchMetadata { + // Create next page token if more results are available + let next_page_token = response + .next_page + .as_ref() + .map(|p| p.start_index.to_string()); + + // Use the actual total_results from the response + let total_results = response.total_results.or_else(|| { + if response.results.len() >= (params.max_results.unwrap_or(10) as usize) { + Some(100000u64) // Conservative estimate + } else { + Some(response.results.len() as u64) + } + }); + + SearchMetadata { + query: params.query.clone(), + total_results, + search_time_ms: Some(response.response_time as f64), + safe_search: params.safe_search, + language: params.language.clone(), + region: params.region.clone(), + next_page_token, + rate_limits: None, + current_page: current_start, + } +} + +pub fn validate_search_params(params: &SearchParams) -> Result<(), SearchError> { + if params.query.trim().is_empty() { + return Err(SearchError::InvalidQuery); + } + if let Some(max_results) = params.max_results { + if max_results > 100 { + return Err(SearchError::UnsupportedFeature( + "max_results cannot exceed 100 for Google Custom Search".to_string(), + )); + } + } + if params.time_range.is_some() { + return Err(SearchError::UnsupportedFeature( + "time-range not supported".to_string(), + )); + } + if params.include_html == Some(true) { + return Err(SearchError::UnsupportedFeature( + "include-html not supported".to_string(), + )); + } + if params.advanced_answer == Some(true) { + return Err(SearchError::UnsupportedFeature( + "advanced-answer not supported".to_string(), + )); + } + Ok(()) +} diff --git a/websearch/google/src/lib.rs b/websearch/google/src/lib.rs new file mode 100644 index 000000000..1bbac0b06 --- /dev/null +++ b/websearch/google/src/lib.rs @@ -0,0 +1,199 @@ +mod client; +mod conversions; + +use std::cell::RefCell; + +use crate::client::{GoogleSearchApi, NextPage, SearchRequest}; +use crate::conversions::{params_to_request, response_to_results, validate_search_params}; +use golem_web_search::durability::Durablewebsearch; +use golem_web_search::durability::ExtendedwebsearchGuest; +use golem_web_search::golem::web_search::web_search::{ + Guest, GuestSearchSession, SearchError, SearchMetadata, SearchParams, SearchResult, + SearchSession, +}; + +use golem_web_search::LOGGING_STATE; + +#[derive(Debug, Clone, PartialEq, golem_rust::FromValueAndType, golem_rust::IntoValue)] +pub struct GoogleReplayState { + pub api_key: String, + pub search_engine_id: String, + pub next_page_token: Option, + pub metadata: Option, + pub finished: bool, +} + +struct GoogleSearch { + client: GoogleSearchApi, + request: SearchRequest, + params: SearchParams, + metadata: Option, + next_page: Option, +} + +impl GoogleSearch { + fn new(client: GoogleSearchApi, request: SearchRequest, params: SearchParams) -> Self { + Self { + client, + request, + params, + metadata: None, + next_page: None, + } + } + + fn next_page(&mut self) -> Result<(Vec, bool), SearchError> { + // Update request with current start index + let mut request = self.request.clone(); + let current_start = if let Some(next_page) = &self.next_page { + request.start = Some(next_page.start_index); + next_page.start_index + } else { + 1 + }; + + let response = self.client.search(request)?; + let (results, metadata) = + response_to_results(response.clone(), &self.params, current_start); + + let finished = response.next_page.is_none(); + self.next_page = response.next_page; + self.metadata = Some(metadata); + Ok((results, finished)) + } + + fn get_metadata(&self) -> Option { + self.metadata.clone() + } +} + +// Create a wrapper that implements GuestSearchSession properly +struct GoogleSearchSession(RefCell); + +impl GoogleSearchSession { + fn new(search: GoogleSearch) -> Self { + Self(RefCell::new(search)) + } +} + +impl GuestSearchSession for GoogleSearchSession { + fn next_page(&self) -> Result, SearchError> { + let mut search = self.0.borrow_mut(); + search.next_page().map(|(results, _)| results) + } + + fn get_metadata(&self) -> Option { + let search = self.0.borrow(); + search.get_metadata() + } +} + +struct GoogleCustomSearchComponent; + +impl GoogleCustomSearchComponent { + const API_KEY_VAR: &'static str = "GOOGLE_API_KEY"; + const SEARCH_ENGINE_ID_VAR: &'static str = "GOOGLE_SEARCH_ENGINE_ID"; + + fn create_client() -> Result { + let api_key = std::env::var(Self::API_KEY_VAR).map_err(|_| { + SearchError::BackendError("GOOGLE_API_KEY environment variable not set".to_string()) + })?; + + let search_engine_id = std::env::var(Self::SEARCH_ENGINE_ID_VAR).map_err(|_| { + SearchError::BackendError( + "GOOGLE_SEARCH_ENGINE_ID environment variable not set".to_string(), + ) + })?; + + Ok(GoogleSearchApi::new(api_key, search_engine_id)) + } + + fn execute_search( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + validate_search_params(¶ms)?; + + let client = Self::create_client()?; + let request = params_to_request(params.clone(), 1)?; + + let response = client.search(request)?; + let (results, metadata) = response_to_results(response, ¶ms, 1); + + Ok((results, Some(metadata))) + } + + fn start_search_session(params: SearchParams) -> Result { + validate_search_params(¶ms)?; + + let client = Self::create_client()?; + let request = params_to_request(params.clone(), 1)?; + + let search = GoogleSearch::new(client, request, params); + Ok(GoogleSearchSession::new(search)) + } +} + +impl Guest for GoogleCustomSearchComponent { + type SearchSession = GoogleSearchSession; + + fn start_search(params: SearchParams) -> Result { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + match Self::start_search_session(params) { + Ok(session) => Ok(SearchSession::new(session)), + Err(err) => Err(err), + } + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + Self::execute_search(params) + } +} + +impl ExtendedwebsearchGuest for GoogleCustomSearchComponent { + type ReplayState = GoogleReplayState; + + fn unwrapped_search_session(params: SearchParams) -> Result { + let client = Self::create_client()?; + let request = crate::conversions::params_to_request(params.clone(), 1)?; + let search = GoogleSearch::new(client, request, params); + Ok(GoogleSearchSession::new(search)) + } + + fn session_to_state(session: &Self::SearchSession) -> Self::ReplayState { + let mut search = session.0.borrow_mut(); + let (_, finished) = search.next_page().unwrap_or_else(|_| (vec![], true)); + GoogleReplayState { + api_key: search.client.api_key().to_string(), + search_engine_id: search.client.search_engine_id().to_string(), + next_page_token: search.next_page.as_ref().map(|p| p.start_index.to_string()), + metadata: search.metadata.clone(), + finished, + } + } + + fn session_from_state( + state: &Self::ReplayState, + params: SearchParams, + ) -> Result { + let client = GoogleSearchApi::new(state.api_key.clone(), state.search_engine_id.clone()); + let request = crate::conversions::params_to_request(params.clone(), 1)?; + let mut search = GoogleSearch::new(client, request, params); + search.next_page = state + .next_page_token + .as_ref() + .and_then(|t| t.parse().ok()) + .map(|start_index| NextPage { start_index }); + search.metadata = state.metadata.clone(); + if state.finished { + let _ = search.next_page(); + } + + Ok(GoogleSearchSession::new(search)) + } +} + +type DurableGoogleComponent = Durablewebsearch; +golem_web_search::export_websearch!(DurableGoogleComponent with_types_in golem_web_search); diff --git a/websearch/google/wit/deps/golem-web-search/golem-web-search.wit b/websearch/google/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..550ee50f0 --- /dev/null +++ b/websearch/google/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + current-page: u32, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result, search-error>; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + +world websearch-library { + export web-search; + export types; +} \ No newline at end of file diff --git a/websearch/google/wit/deps/wasi:io/error.wit b/websearch/google/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/websearch/google/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/websearch/google/wit/deps/wasi:io/poll.wit b/websearch/google/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/websearch/google/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/websearch/google/wit/deps/wasi:io/streams.wit b/websearch/google/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/websearch/google/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/websearch/google/wit/deps/wasi:io/world.wit b/websearch/google/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/websearch/google/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/websearch/google/wit/google.wit b/websearch/google/wit/google.wit new file mode 100644 index 000000000..2f7ae604b --- /dev/null +++ b/websearch/google/wit/google.wit @@ -0,0 +1,6 @@ +package golem:web-search-google@1.0.0; + +world websearch-library { + export golem:web-search/web-search@1.0.0; + export golem:web-search/types@1.0.0; +} \ No newline at end of file diff --git a/websearch/serper/Cargo.toml b/websearch/serper/Cargo.toml new file mode 100644 index 000000000..ca1cdcd59 --- /dev/null +++ b/websearch/serper/Cargo.toml @@ -0,0 +1,54 @@ +[package] +name = "golem-web-search-serper" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly component for querying Serper APIs via the golem:web-search interface" + +[lib] +crate-type = ["cdylib"] +path = "src/lib.rs" + +[features] +default = ["durability"] +durability = ["golem-rust/durability", "golem-web-search/durability"] + +[dependencies] +golem-web-search = { path = "../websearch", version = "0.0.0", default-features = false } +golem-rust = { workspace = true } +log = { workspace = true } +reqwest = { workspace = true, features = ["json"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } +base64 = { workspace = true } +url = "2.5" + +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasm-bindgen = "0.2" + +[profile.release] +opt-level = "s" +lto = true +codegen-units = 1 +panic = "abort" + +[package.metadata.component] +package = "golem:web-search-serper" + +[package.metadata.component.bindings] +generate_unused_types = true + +[package.metadata.component.bindings.with] +"golem:web-search/web-search@1.0.0" = "golem_websearch::golem::websearch::websearch" +"golem:web-search/types@1.0.0" = "golem_websearch::golem::websearch::types" + + +[package.metadata.component.target] +path = "wit" + +[package.metadata.component.target.dependencies] +"golem:websearch" = { path = "wit/deps/golem-web-search" } +"wasi:io" = { path = "wit/deps/wasi:io" } diff --git a/websearch/serper/src/bindings.rs b/websearch/serper/src/bindings.rs new file mode 100644 index 000000000..294126b5c --- /dev/null +++ b/websearch/serper/src/bindings.rs @@ -0,0 +1,49 @@ +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! +// Options used: +// * runtime_path: "wit_bindgen_rt" +// * with "golem:web-search/types@1.0.0" = "golem_websearch::golem::websearch::types" +// * with "golem:web-search/web-search@1.0.0" = "golem_websearch::golem::websearch::websearch" +// * generate_unused_types +use golem_websearch::golem::websearch::types as __with_name0; +use golem_websearch::golem::websearch::websearch as __with_name1; +#[cfg(target_arch = "wasm32")] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:web-search-serper@1.0.0:websearch-library:encoded world" +)] +#[doc(hidden)] +#[allow(clippy::octal_escapes)] +pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1389] = *b"\ +\0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xe5\x09\x01A\x02\x01\ +A\x08\x01B\x1c\x01ks\x01r\x02\x03urls\x0bdescription\0\x04\0\x0cimage-result\x03\ +\0\x01\x01ku\x01p\x02\x01k\x04\x01ps\x01k\x06\x01r\x0a\x05titles\x03urls\x07snip\ +pets\x0bdisplay-url\0\x06source\0\x05score\x03\x0chtml-snippet\0\x0edate-publish\ +ed\0\x06images\x05\x0econtent-chunks\x07\x04\0\x0dsearch-result\x03\0\x08\x01m\x03\ +\x03off\x06medium\x04high\x04\0\x11safe-search-level\x03\0\x0a\x01r\x03\x05limit\ +y\x09remainingy\x0freset-timestampw\x04\0\x0frate-limit-info\x03\0\x0c\x01kw\x01\ +k\x0b\x01k\x0d\x01r\x09\x05querys\x0dtotal-results\x0e\x0esearch-time-ms\x03\x0b\ +safe-search\x0f\x08language\0\x06region\0\x0fnext-page-token\0\x0brate-limits\x10\ +\x0ccurrent-pagey\x04\0\x0fsearch-metadata\x03\0\x11\x01m\x04\x03day\x04week\x05\ +month\x04year\x04\0\x0atime-range\x03\0\x13\x01ky\x01k\x14\x01k\x7f\x01r\x0b\x05\ +querys\x0bsafe-search\x0f\x08language\0\x06region\0\x0bmax-results\x15\x0atime-r\ +ange\x16\x0finclude-domains\x07\x0fexclude-domains\x07\x0einclude-images\x17\x0c\ +include-html\x17\x0fadvanced-answer\x17\x04\0\x0dsearch-params\x03\0\x18\x01q\x04\ +\x0dinvalid-query\0\0\x0crate-limited\x01y\0\x13unsupported-feature\x01s\0\x0dba\ +ckend-error\x01s\0\x04\0\x0csearch-error\x03\0\x1a\x04\0\x1cgolem:web-search/typ\ +es@1.0.0\x05\0\x02\x03\0\0\x0dsearch-params\x02\x03\0\0\x0dsearch-result\x02\x03\ +\0\0\x0fsearch-metadata\x02\x03\0\0\x0csearch-error\x01B\x19\x02\x03\x02\x01\x01\ +\x04\0\x0dsearch-params\x03\0\0\x02\x03\x02\x01\x02\x04\0\x0dsearch-result\x03\0\ +\x02\x02\x03\x02\x01\x03\x04\0\x0fsearch-metadata\x03\0\x04\x02\x03\x02\x01\x04\x04\ +\0\x0csearch-error\x03\0\x06\x04\0\x0esearch-session\x03\x01\x01h\x08\x01p\x03\x01\ +j\x01\x0a\x01\x07\x01@\x01\x04self\x09\0\x0b\x04\0\x20[method]search-session.nex\ +t-page\x01\x0c\x01k\x05\x01@\x01\x04self\x09\0\x0d\x04\0#[method]search-session.\ +get-metadata\x01\x0e\x01i\x08\x01j\x01\x0f\x01\x07\x01@\x01\x06params\x01\0\x10\x04\ +\0\x0cstart-search\x01\x11\x01o\x02\x0a\x0d\x01j\x01\x12\x01\x07\x01@\x01\x06par\ +ams\x01\0\x13\x04\0\x0bsearch-once\x01\x14\x04\0!golem:web-search/web-search@1.0\ +.0\x05\x05\x04\0/golem:web-search-serper/websearch-library@1.0.0\x04\0\x0b\x17\x01\ +\0\x11websearch-library\x03\0\0\0G\x09producers\x01\x0cprocessed-by\x02\x0dwit-c\ +omponent\x070.227.1\x10wit-bindgen-rust\x060.41.0"; +#[inline(never)] +#[doc(hidden)] +pub fn __link_custom_section_describing_imports() { + wit_bindgen_rt::maybe_link_cabi_realloc(); +} diff --git a/websearch/serper/src/client.rs b/websearch/serper/src/client.rs new file mode 100644 index 000000000..7df6101a9 --- /dev/null +++ b/websearch/serper/src/client.rs @@ -0,0 +1,126 @@ +use golem_web_search::error::from_reqwest_error; +use golem_web_search::golem::web_search::web_search::SearchError; +use log::trace; +use reqwest::Method; +use reqwest::{Client, Response}; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; +use std::fmt::Debug; + +const BASE_URL: &str = "https://google.serper.dev/search"; + +/// The Serper Search API client for Google-powered web search. +pub struct SerperSearchApi { + pub api_key: String, + client: Client, +} + +impl SerperSearchApi { + pub fn new(api_key: String) -> Self { + let client = Client::builder() + .user_agent("Golem-Web-Search/1.0") + .build() + .expect("Failed to initialize HTTP client"); + + Self { api_key, client } + } + + pub fn search(&self, request: SearchRequest) -> Result { + trace!("Sending request to Serper Search API: {request:?}"); + + let response = self + .client + .request(Method::POST, BASE_URL) + .header("X-API-KEY", &self.api_key) + .header("Content-Type", "application/json") + .json(&request) + .send() + .map_err(|err| from_reqwest_error("Request failed", err))?; + + parse_response(response) + } + + pub fn api_key(&self) -> &String { + &self.api_key + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchRequest { + pub q: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub gl: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub hl: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub num: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub page: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResponse { + pub organic: Vec, + #[serde(rename = "searchParameters")] + pub search_parameters: SearchParameters, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResult { + pub title: String, + pub link: String, + pub snippet: String, + pub position: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchParameters { + pub q: String, + #[serde(rename = "type")] + pub search_type: String, + pub engine: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ErrorResponse { + pub message: String, + pub error: Option, +} + +fn parse_response(response: Response) -> Result { + let status = response.status(); + if status.is_success() { + let body = response + .json::() + .map_err(|err| from_reqwest_error("Failed to decode response body", err))?; + + trace!("Received response from Serper Search API: {body:?}"); + Ok(body) + } else { + // Try to parse error response + match response.json::() { + Ok(error_body) => { + trace!("Received {status} response from Serper Search API: {error_body:?}"); + + let search_error = match status.as_u16() { + 400 => SearchError::InvalidQuery, + 401 => SearchError::BackendError("Invalid API key".to_string()), + 403 => SearchError::BackendError("API access forbidden".to_string()), + 429 => SearchError::RateLimited(60), // Default to 60 seconds + _ => SearchError::BackendError(format!( + "Request failed with {}: {}", + status, error_body.message + )), + }; + + Err(search_error) + } + Err(_) => { + // Fallback for non-JSON error responses + Err(SearchError::BackendError(format!( + "Request failed with status {status}" + ))) + } + } + } +} diff --git a/websearch/serper/src/conversions.rs b/websearch/serper/src/conversions.rs new file mode 100644 index 000000000..02f6207b6 --- /dev/null +++ b/websearch/serper/src/conversions.rs @@ -0,0 +1,160 @@ +use crate::client::{SearchRequest, SearchResponse, SearchResult as SerperSearchResult}; +use golem_web_search::golem::web_search::web_search::{ + SearchError, SearchMetadata, SearchParams, SearchResult, +}; + +pub fn params_to_request(params: SearchParams, page: u32) -> Result { + // Validate query + if params.query.trim().is_empty() { + return Err(SearchError::InvalidQuery); + } + + // Convert region to Google country code + let gl = params + .region + .map(|region| match region.to_lowercase().as_str() { + "us" | "usa" | "united states" => "us".to_string(), + "uk" | "gb" | "united kingdom" => "uk".to_string(), + "in" | "india" => "in".to_string(), + _ => region, + }); + + // Convert language to Google language code + let hl = params + .language + .map(|lang| match lang.to_lowercase().as_str() { + "english" | "en" => "en".to_string(), + "spanish" | "es" => "es".to_string(), + "french" | "fr" => "fr".to_string(), + _ => lang, + }); + + Ok(SearchRequest { + q: params.query.clone(), + gl, + hl, + num: params.max_results, + page: Some(page), // 1-based + }) +} + +pub fn response_to_results( + response: SearchResponse, + original_params: &SearchParams, + current_page: u32, +) -> (Vec, SearchMetadata) { + let mut results = Vec::new(); + + // Process organic search results + for (index, item) in response.organic.iter().enumerate() { + results.push(serper_result_to_search_result(item, index)); + } + + let metadata = create_search_metadata(&response, original_params, current_page); + (results, metadata) +} + +fn serper_result_to_search_result(item: &SerperSearchResult, index: usize) -> SearchResult { + // Calculate score based on position + let score = 1.0 - (index as f32) * 0.01; + + SearchResult { + title: item.title.clone(), + url: item.link.clone(), + snippet: item.snippet.clone(), + display_url: extract_domain(&item.link), + source: extract_domain(&item.link), + score: Some(score as f64), + html_snippet: None, + date_published: None, + images: None, + content_chunks: Some(vec![item.snippet.clone()]), + } +} + +fn extract_domain(url: &str) -> Option { + if let Ok(parsed_url) = url::Url::parse(url) { + parsed_url.host_str().map(|host| host.to_string()) + } else { + None + } +} + +fn create_search_metadata( + response: &SearchResponse, + params: &SearchParams, + current_page: u32, +) -> SearchMetadata { + // Check if we got the full count requested + let has_more_results = { + let requested_count = params.max_results.unwrap_or(10); + response.organic.len() == (requested_count as usize) + }; + + // Create next page token if more results are available + let next_page_token = if has_more_results { + let next_page = current_page + 1; + Some(next_page.to_string()) + } else { + None + }; + + // Estimate total results + let total_results = if (response.organic.len() as u32) >= params.max_results.unwrap_or(10) { + Some(100000u64) // Conservative estimate + } else { + Some(response.organic.len() as u64) + }; + + SearchMetadata { + query: params.query.clone(), + total_results, + search_time_ms: None, + safe_search: params.safe_search, + language: params.language.clone(), + region: params.region.clone(), + next_page_token, + rate_limits: None, + current_page, // 1-based + } +} + +pub fn validate_search_params(params: &SearchParams) -> Result<(), SearchError> { + if params.query.trim().is_empty() { + return Err(SearchError::InvalidQuery); + } + + if let Some(max_results) = params.max_results { + if max_results > 100 { + return Err(SearchError::UnsupportedFeature( + "max_results cannot exceed 100 for Serper Search".to_string(), + )); + } + } + if params.safe_search.is_some() { + return Err(SearchError::UnsupportedFeature( + "safe_search not supported".to_string(), + )); + } + if params.include_html == Some(true) { + return Err(SearchError::UnsupportedFeature( + "include-html not supported".to_string(), + )); + } + if params.time_range.is_some() { + return Err(SearchError::UnsupportedFeature( + "time-range not supported".to_string(), + )); + } + if params.include_images == Some(true) { + return Err(SearchError::UnsupportedFeature( + "include-images not supported".to_string(), + )); + } + if params.advanced_answer == Some(true) { + return Err(SearchError::UnsupportedFeature( + "advanced-answer not supported".to_string(), + )); + } + Ok(()) +} diff --git a/websearch/serper/src/lib.rs b/websearch/serper/src/lib.rs new file mode 100644 index 000000000..46d473f61 --- /dev/null +++ b/websearch/serper/src/lib.rs @@ -0,0 +1,202 @@ +mod client; +mod conversions; + +use std::cell::RefCell; + +use crate::client::{SearchRequest, SerperSearchApi}; +use crate::conversions::{params_to_request, response_to_results, validate_search_params}; +use golem_web_search::durability::Durablewebsearch; +use golem_web_search::durability::ExtendedwebsearchGuest; +use golem_web_search::golem::web_search::web_search::{ + Guest, GuestSearchSession, SearchError, SearchMetadata, SearchParams, SearchResult, + SearchSession, +}; + +use golem_web_search::LOGGING_STATE; + +#[derive(Debug, Clone, PartialEq, golem_rust::FromValueAndType, golem_rust::IntoValue)] +pub struct SerperReplayState { + pub api_key: String, + pub current_page: u32, + pub metadata: SearchMetadata, + pub finished: bool, +} + +struct SerperSearch { + client: SerperSearchApi, + request: SearchRequest, + params: SearchParams, + metadata: SearchMetadata, + current_page: u32, // 1-based +} + +impl SerperSearch { + fn new(client: SerperSearchApi, request: SearchRequest, params: SearchParams) -> Self { + Self { + client, + request, + params: params.clone(), + metadata: SearchMetadata { + query: params.query, + total_results: None, + search_time_ms: None, + safe_search: None, + language: None, + region: None, + next_page_token: None, + rate_limits: None, + current_page: 1, + }, + current_page: 1, // 1-based + } + } + fn next_page(&mut self) -> Result<(Vec, bool), SearchError> { + let request = + crate::conversions::params_to_request(self.params.clone(), self.current_page)?; + let response = self.client.search(request)?; + let (results, metadata) = response_to_results(response, &self.params, self.current_page); + + // Determine if more results are available + let num_results = self.request.num.unwrap_or(10); + let finished = results.len() < (num_results as usize); + + // Update metadata for this page + self.metadata = metadata; + self.metadata.current_page = self.current_page; + + if !finished { + self.current_page += 1; + self.metadata.next_page_token = Some(self.current_page.to_string()); + } else { + self.metadata.next_page_token = None; + } + + Ok((results, finished)) + } + fn get_metadata(&self) -> Option { + Some(self.metadata.clone()) + } +} + +// Create a wrapper that implements GuestSearchSession properly +struct SerperSearchSession(RefCell); + +impl SerperSearchSession { + fn new(search: SerperSearch) -> Self { + Self(RefCell::new(search)) + } +} + +impl GuestSearchSession for SerperSearchSession { + fn next_page(&self) -> Result, SearchError> { + let mut search = self.0.borrow_mut(); + let (results, _) = search.next_page()?; + Ok(results) + } + + fn get_metadata(&self) -> Option { + let search = self.0.borrow(); + search.get_metadata() + } +} + +struct SerperSearchComponent; + +impl SerperSearchComponent { + const API_KEY_VAR: &'static str = "SERPER_API_KEY"; + + fn get_api_key() -> Result { + std::env::var(Self::API_KEY_VAR).map_err(|_| { + SearchError::BackendError("SERPER_API_KEY environment variable not set".to_string()) + }) + } + + fn create_client() -> Result { + let api_key = Self::get_api_key()?; + Ok(SerperSearchApi::new(api_key)) + } + + fn execute_search( + params: SearchParams, + ) -> Result<(Vec, SearchMetadata), SearchError> { + validate_search_params(¶ms)?; + + let client = Self::create_client()?; + let request = params_to_request(params.clone(), 1)?; + + let response = client.search(request)?; + let (results, metadata) = response_to_results(response, ¶ms, 1); + + Ok((results, metadata)) + } + + fn start_search_session(params: SearchParams) -> Result { + validate_search_params(¶ms)?; + + let client = Self::create_client()?; + let request = params_to_request(params.clone(), 1)?; + + let search = SerperSearch::new(client, request, params); + Ok(SerperSearchSession::new(search)) + } +} + +impl Guest for SerperSearchComponent { + type SearchSession = SerperSearchSession; + + fn start_search(params: SearchParams) -> Result { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + match Self::start_search_session(params) { + Ok(session) => Ok(SearchSession::new(session)), + Err(err) => Err(err), + } + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + let (results, metadata) = Self::execute_search(params)?; + Ok((results, Some(metadata))) + } +} + +impl ExtendedwebsearchGuest for SerperSearchComponent { + type ReplayState = SerperReplayState; + + fn unwrapped_search_session(params: SearchParams) -> Result { + let client = Self::create_client()?; + let request = crate::conversions::params_to_request(params.clone(), 1)?; + let search = SerperSearch::new(client, request, params); + Ok(SerperSearchSession::new(search)) + } + + fn session_to_state(session: &Self::SearchSession) -> Self::ReplayState { + let mut search = session.0.borrow_mut(); + let (_, finished) = search.next_page().unwrap_or((vec![], true)); + SerperReplayState { + api_key: search.client.api_key().to_string(), + current_page: search.current_page, + metadata: search.metadata.clone(), + finished, + } + } + + fn session_from_state( + state: &Self::ReplayState, + params: SearchParams, + ) -> Result { + let client = SerperSearchApi::new(state.api_key.clone()); + let request = crate::conversions::params_to_request(params.clone(), state.current_page)?; + let mut search = SerperSearch::new(client, request, params); + search.current_page = state.current_page; + search.metadata = state.metadata.clone(); + if state.finished { + let _ = search.next_page(); + } + Ok(SerperSearchSession::new(search)) + } +} + +type DurableSerperComponent = Durablewebsearch; +golem_web_search::export_websearch!(DurableSerperComponent with_types_in golem_web_search); diff --git a/websearch/serper/wit/deps/golem-web-search/golem-web-search.wit b/websearch/serper/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..550ee50f0 --- /dev/null +++ b/websearch/serper/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + current-page: u32, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result, search-error>; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + +world websearch-library { + export web-search; + export types; +} \ No newline at end of file diff --git a/websearch/serper/wit/deps/wasi:io/error.wit b/websearch/serper/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/websearch/serper/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/websearch/serper/wit/deps/wasi:io/poll.wit b/websearch/serper/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/websearch/serper/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/websearch/serper/wit/deps/wasi:io/streams.wit b/websearch/serper/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/websearch/serper/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/websearch/serper/wit/deps/wasi:io/world.wit b/websearch/serper/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/websearch/serper/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/websearch/serper/wit/serper.wit b/websearch/serper/wit/serper.wit new file mode 100644 index 000000000..1b8a1e207 --- /dev/null +++ b/websearch/serper/wit/serper.wit @@ -0,0 +1,6 @@ +package golem:web-search-serper@1.0.0; + +world websearch-library { + export golem:web-search/web-search@1.0.0; + export golem:web-search/types@1.0.0; +} diff --git a/websearch/tavily/Cargo.toml b/websearch/tavily/Cargo.toml new file mode 100644 index 000000000..f731ab6ed --- /dev/null +++ b/websearch/tavily/Cargo.toml @@ -0,0 +1,55 @@ +[package] +name = "golem-web-search-tavily" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly component for querying Tavily APIs via the golem:web-search interface" + +[lib] +crate-type = ["cdylib"] +path = "src/lib.rs" + +[features] +default = ["durability"] +durability = ["golem-rust/durability", "golem-web-search/durability"] + +[dependencies] +golem-web-search = { path = "../websearch" } +golem-rust = { workspace = true } +log = { workspace = true } +reqwest = { workspace = true, features = ["json"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +wit-bindgen-rt = { workspace = true } +base64 = { workspace = true } +url = "2.5" +urlencoding = "2.1" + +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasm-bindgen = "0.2" + +[profile.release] +opt-level = "s" +lto = true +codegen-units = 1 +panic = "abort" + +[package.metadata.component] +package = "golem:web-search-tavily" + +[package.metadata.component.bindings] +generate_unused_types = true + +[package.metadata.component.bindings.with] +"golem:web-search/web-search@1.0.0" = "golem_websearch::golem::websearch::websearch" +"golem:web-search/types@1.0.0" = "golem_websearch::golem::websearch::types" + + +[package.metadata.component.target] +path = "wit" + +[package.metadata.component.target.dependencies] +"golem:websearch" = { path = "wit/deps/golem-web-search" } +"wasi:io" = { path = "wit/deps/wasi:io" } diff --git a/websearch/tavily/src/bindings.rs b/websearch/tavily/src/bindings.rs new file mode 100644 index 000000000..63794f7a1 --- /dev/null +++ b/websearch/tavily/src/bindings.rs @@ -0,0 +1,49 @@ +// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT! +// Options used: +// * runtime_path: "wit_bindgen_rt" +// * with "golem:web-search/types@1.0.0" = "golem_websearch::golem::websearch::types" +// * with "golem:web-search/web-search@1.0.0" = "golem_websearch::golem::websearch::websearch" +// * generate_unused_types +use golem_websearch::golem::websearch::types as __with_name0; +use golem_websearch::golem::websearch::websearch as __with_name1; +#[cfg(target_arch = "wasm32")] +#[unsafe( + link_section = "component-type:wit-bindgen:0.41.0:golem:web-search-travily@1.0.0:websearch-library:encoded world" +)] +#[doc(hidden)] +#[allow(clippy::octal_escapes)] +pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; 1390] = *b"\ +\0asm\x0d\0\x01\0\0\x19\x16wit-component-encoding\x04\0\x07\xe6\x09\x01A\x02\x01\ +A\x08\x01B\x1c\x01ks\x01r\x02\x03urls\x0bdescription\0\x04\0\x0cimage-result\x03\ +\0\x01\x01ku\x01p\x02\x01k\x04\x01ps\x01k\x06\x01r\x0a\x05titles\x03urls\x07snip\ +pets\x0bdisplay-url\0\x06source\0\x05score\x03\x0chtml-snippet\0\x0edate-publish\ +ed\0\x06images\x05\x0econtent-chunks\x07\x04\0\x0dsearch-result\x03\0\x08\x01m\x03\ +\x03off\x06medium\x04high\x04\0\x11safe-search-level\x03\0\x0a\x01r\x03\x05limit\ +y\x09remainingy\x0freset-timestampw\x04\0\x0frate-limit-info\x03\0\x0c\x01kw\x01\ +k\x0b\x01k\x0d\x01r\x09\x05querys\x0dtotal-results\x0e\x0esearch-time-ms\x03\x0b\ +safe-search\x0f\x08language\0\x06region\0\x0fnext-page-token\0\x0brate-limits\x10\ +\x0ccurrent-pagey\x04\0\x0fsearch-metadata\x03\0\x11\x01m\x04\x03day\x04week\x05\ +month\x04year\x04\0\x0atime-range\x03\0\x13\x01ky\x01k\x14\x01k\x7f\x01r\x0b\x05\ +querys\x0bsafe-search\x0f\x08language\0\x06region\0\x0bmax-results\x15\x0atime-r\ +ange\x16\x0finclude-domains\x07\x0fexclude-domains\x07\x0einclude-images\x17\x0c\ +include-html\x17\x0fadvanced-answer\x17\x04\0\x0dsearch-params\x03\0\x18\x01q\x04\ +\x0dinvalid-query\0\0\x0crate-limited\x01y\0\x13unsupported-feature\x01s\0\x0dba\ +ckend-error\x01s\0\x04\0\x0csearch-error\x03\0\x1a\x04\0\x1cgolem:web-search/typ\ +es@1.0.0\x05\0\x02\x03\0\0\x0dsearch-params\x02\x03\0\0\x0dsearch-result\x02\x03\ +\0\0\x0fsearch-metadata\x02\x03\0\0\x0csearch-error\x01B\x19\x02\x03\x02\x01\x01\ +\x04\0\x0dsearch-params\x03\0\0\x02\x03\x02\x01\x02\x04\0\x0dsearch-result\x03\0\ +\x02\x02\x03\x02\x01\x03\x04\0\x0fsearch-metadata\x03\0\x04\x02\x03\x02\x01\x04\x04\ +\0\x0csearch-error\x03\0\x06\x04\0\x0esearch-session\x03\x01\x01h\x08\x01p\x03\x01\ +j\x01\x0a\x01\x07\x01@\x01\x04self\x09\0\x0b\x04\0\x20[method]search-session.nex\ +t-page\x01\x0c\x01k\x05\x01@\x01\x04self\x09\0\x0d\x04\0#[method]search-session.\ +get-metadata\x01\x0e\x01i\x08\x01j\x01\x0f\x01\x07\x01@\x01\x06params\x01\0\x10\x04\ +\0\x0cstart-search\x01\x11\x01o\x02\x0a\x0d\x01j\x01\x12\x01\x07\x01@\x01\x06par\ +ams\x01\0\x13\x04\0\x0bsearch-once\x01\x14\x04\0!golem:web-search/web-search@1.0\ +.0\x05\x05\x04\00golem:web-search-travily/websearch-library@1.0.0\x04\0\x0b\x17\x01\ +\0\x11websearch-library\x03\0\0\0G\x09producers\x01\x0cprocessed-by\x02\x0dwit-c\ +omponent\x070.227.1\x10wit-bindgen-rust\x060.41.0"; +#[inline(never)] +#[doc(hidden)] +pub fn __link_custom_section_describing_imports() { + wit_bindgen_rt::maybe_link_cabi_realloc(); +} diff --git a/websearch/tavily/src/client.rs b/websearch/tavily/src/client.rs new file mode 100644 index 000000000..23992e16e --- /dev/null +++ b/websearch/tavily/src/client.rs @@ -0,0 +1,129 @@ +use golem_web_search::error::from_reqwest_error; +use golem_web_search::golem::web_search::web_search::SearchError; +use log::trace; +use reqwest::Method; +use reqwest::Response; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; +use std::fmt::Debug; + +const BASE_URL: &str = "https://api.tavily.com/search"; + +/// The Tavily Search API client for web search with deep document indexing. +pub struct TavilySearchApi { + client: reqwest::Client, + pub api_key: String, +} + +impl TavilySearchApi { + pub fn new(api_key: String) -> Self { + let client = reqwest::Client::new(); + Self { client, api_key } + } + + pub fn search(&self, mut request: SearchRequest) -> Result { + trace!("Sending request to Tavily Search API: {request:?}"); + request.api_key = self.api_key.clone(); + let response = self + .client + .request(Method::POST, BASE_URL) + .header("Content-Type", "application/json") + .json(&request) + .send() + .map_err(|err| from_reqwest_error("Request failed", err))?; + + parse_response(response) + } + + pub fn api_key(&self) -> &String { + &self.api_key + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchRequest { + pub api_key: String, + pub query: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub search_depth: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_images: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_answer: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_raw_content: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub max_results: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_domains: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub exclude_domains: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub format: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub days: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResponse { + pub answer: Option, + pub query: String, + pub response_time: f32, + pub images: Option>, + pub results: Vec, + pub follow_up_questions: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResult { + pub title: String, + pub url: String, + pub content: String, + pub raw_content: Option, + pub score: f32, + pub published_date: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ErrorResponse { + pub error: String, + pub detail: Option, +} + +fn parse_response(response: Response) -> Result { + let status = response.status(); + if status.is_success() { + let body = response + .json::() + .map_err(|err| from_reqwest_error("Failed to decode response body", err))?; + + trace!("Received response from Tavily Search API: {body:?}"); + Ok(body) + } else { + // Try to parse error response + match response.json::() { + Ok(error_body) => { + trace!("Received {status} response from Tavily Search API: {error_body:?}"); + + let search_error = match status.as_u16() { + 400 => SearchError::InvalidQuery, + 401 => SearchError::BackendError("Invalid API key".to_string()), + 403 => SearchError::BackendError("API key quota exceeded".to_string()), + 429 => SearchError::RateLimited(60), // Default to 60 seconds + _ => SearchError::BackendError(format!( + "Request failed with {}: {}", + status, error_body.error + )), + }; + + Err(search_error) + } + Err(_) => { + // Fallback for non-JSON error responses + Err(SearchError::BackendError(format!( + "Request failed with status {status}" + ))) + } + } + } +} diff --git a/websearch/tavily/src/conversions.rs b/websearch/tavily/src/conversions.rs new file mode 100644 index 000000000..0b9d6635b --- /dev/null +++ b/websearch/tavily/src/conversions.rs @@ -0,0 +1,222 @@ +use crate::client::{SearchRequest, SearchResponse, SearchResult as TavilySearchResult}; +use golem_web_search::golem::web_search::types::{ImageResult, TimeRange}; +use golem_web_search::golem::web_search::web_search::{ + SearchError, SearchMetadata, SearchParams, SearchResult, +}; + +pub fn params_to_request( + params: SearchParams, + api_key: String, + _page: u32, +) -> Result { + // Validate query + if params.query.trim().is_empty() { + return Err(SearchError::InvalidQuery); + } + + // Determine search depth based on parameters + let search_depth = determine_search_depth(¶ms); + + // Convert time range to days + let days = params.time_range.map(|range| match range { + TimeRange::Day => 1, + TimeRange::Week => 7, + TimeRange::Month => 30, + TimeRange::Year => 365, + }); + + // Handle domain filtering + let query = params.query.clone(); + + // For exclude_domains, we'll add them to the exclude_domains parameter + // rather than modifying the query directly + let exclude_domains = params.exclude_domains.clone(); + let include_domains = params.include_domains.clone(); + + // Note: Tavily's SearchRequest doesn't have pagination fields (page/start/offset) + // This is a limitation of the current API structure + Ok(SearchRequest { + api_key, + query, + search_depth: Some(search_depth), + include_images: params.include_images, + include_answer: Some(true), // Always include answer for better results + include_raw_content: Some(true), // Include raw content for better content chunks + max_results: params.max_results, + include_domains, + exclude_domains, + format: Some("json".to_string()), + days, + }) +} + +fn determine_search_depth(params: &SearchParams) -> String { + // Use "advanced" search depth if we need comprehensive results + // Use "basic" for faster, simpler searches + if params.max_results.unwrap_or(10) > 10 || params.include_images == Some(true) { + "advanced".to_string() + } else { + "basic".to_string() + } +} + +pub fn response_to_results( + response: SearchResponse, + original_params: &SearchParams, + current_page: u32, +) -> (Vec, SearchMetadata) { + let mut results = Vec::new(); + + // Process main search results + for (index, item) in response.results.iter().enumerate() { + results.push(tavily_result_to_search_result( + item, + index, + original_params.include_images.unwrap_or(false), + &response.images, + )); + } + + // If we have an answer, create a special result for it + if let Some(answer) = &response.answer { + let answer_result = SearchResult { + title: "AI-Generated Answer".to_string(), + url: "https://tavily.com".to_string(), // Placeholder URL + snippet: answer.clone(), + display_url: Some("tavily.com".to_string()), + source: Some("Tavily AI".to_string()), + score: Some(1.0), // Highest score for AI answer + html_snippet: None, + date_published: None, + images: None, + content_chunks: Some(vec![answer.clone()]), + }; + + // Insert at the beginning + results.insert(0, answer_result); + } + + let metadata = create_search_metadata(&response, original_params, current_page); + (results, metadata) +} + +fn tavily_result_to_search_result( + item: &TavilySearchResult, + index: usize, + include_images: bool, + response_images: &Option>, +) -> SearchResult { + let mut images = None; + let mut content_chunks = None; + + // Extract images if requested and available + if include_images { + if let Some(img_urls) = response_images { + if !img_urls.is_empty() { + images = Some( + img_urls + .iter() + .map(|url| ImageResult { + url: url.clone(), + description: Some(format!("Image related to: {}", item.title)), + }) + .collect(), + ); + } + } + } + + // Create content chunks from both content and raw_content + let mut chunks = Vec::new(); + + // Add main content + if !item.content.is_empty() { + chunks.push(item.content.clone()); + } + + // Add raw content if available and different from main content + if let Some(raw_content) = &item.raw_content { + if !raw_content.is_empty() && raw_content != &item.content { + chunks.push(raw_content.clone()); + } + } + + if !chunks.is_empty() { + content_chunks = Some(chunks); + } + + // Use Tavily's score directly, but adjust for position bias + let adjusted_score = item.score * (1.0 - (index as f32) * 0.01); + + SearchResult { + title: item.title.clone(), + url: item.url.clone(), + snippet: item.content.clone(), + display_url: extract_domain(&item.url), + source: extract_domain(&item.url), + score: Some(adjusted_score as f64), + html_snippet: None, + date_published: item.published_date.clone(), + images, + content_chunks, + } +} + +fn extract_domain(url: &str) -> Option { + if let Ok(parsed_url) = url::Url::parse(url) { + parsed_url.host_str().map(|host| host.to_string()) + } else { + None + } +} + +fn create_search_metadata( + response: &SearchResponse, + params: &SearchParams, + current_page: u32, +) -> SearchMetadata { + let total_results = Some(response.results.len() as u64); + let next_page_token = if (response.results.len() as u32) + > (current_page + 1) * params.max_results.unwrap_or(10) + { + Some((current_page + 1).to_string()) + } else { + None + }; + + SearchMetadata { + query: params.query.clone(), + total_results, + search_time_ms: Some(response.response_time as f64), + safe_search: params.safe_search, + language: params.language.clone(), + region: params.region.clone(), + next_page_token, + rate_limits: None, + current_page, + } +} + +pub fn validate_search_params(params: &SearchParams) -> Result<(), SearchError> { + if params.query.trim().is_empty() { + return Err(SearchError::InvalidQuery); + } + if let Some(max_results) = params.max_results { + if max_results > 500 { + return Err(SearchError::UnsupportedFeature( + "max_results cannot exceed 500 for Tavily Search".to_string(), + )); + } + } + if params.safe_search.is_some() { + return Err(SearchError::UnsupportedFeature( + "safe_search not supported".to_string(), + )); + } + if params.include_html == Some(true) { + return Err(SearchError::UnsupportedFeature( + "include-html not supported".to_string(), + )); + } + Ok(()) +} diff --git a/websearch/tavily/src/lib.rs b/websearch/tavily/src/lib.rs new file mode 100644 index 000000000..ac9c6aa7b --- /dev/null +++ b/websearch/tavily/src/lib.rs @@ -0,0 +1,212 @@ +mod client; +mod conversions; + +use std::cell::RefCell; + +use crate::client::{SearchRequest, TavilySearchApi}; +use crate::conversions::{params_to_request, response_to_results, validate_search_params}; +use golem_web_search::durability::Durablewebsearch; +use golem_web_search::durability::ExtendedwebsearchGuest; +use golem_web_search::golem::web_search::web_search::{ + Guest, GuestSearchSession, SearchError, SearchMetadata, SearchParams, SearchResult, + SearchSession, +}; + +use golem_web_search::LOGGING_STATE; + +#[derive(Debug, Clone, PartialEq, golem_rust::FromValueAndType, golem_rust::IntoValue)] +pub struct TavilyReplayState { + pub api_key: String, + pub metadata: Option, + pub finished: bool, + pub all_results: Vec, + pub current_page: u32, +} + +struct TavilySearch { + client: TavilySearchApi, + params: SearchParams, + all_results: Vec, + page_size: u32, + current_page: u32, + metadata: Option, +} + +impl TavilySearch { + fn new(client: TavilySearchApi, _request: SearchRequest, params: SearchParams) -> Self { + let page_size = params.max_results.unwrap_or(10); + Self { + client, + params, + all_results: Vec::new(), + page_size, + current_page: 0, + metadata: None, + } + } + + fn fetch_all_results(&mut self) -> Result<(), SearchError> { + let api_key = std::env::var("TAVILY_API_KEY").unwrap_or_default(); + let request = crate::conversions::params_to_request(self.params.clone(), api_key, 0)?; + let response = self.client.search(request)?; + let (results, metadata) = response_to_results(response, &self.params, 0); + self.all_results = results; + self.metadata = Some(metadata); + Ok(()) + } + + fn next_page(&mut self) -> Result<(Vec, bool), SearchError> { + if self.all_results.is_empty() { + self.fetch_all_results()?; + } + let start = (self.current_page * self.page_size) as usize; + let end = (((self.current_page + 1) * self.page_size) as usize).min(self.all_results.len()); + let page_results = if start < self.all_results.len() { + self.all_results[start..end].to_vec() + } else { + Vec::new() + }; + // Update metadata for this page + if let Some(metadata) = &mut self.metadata { + metadata.current_page = self.current_page; + metadata.next_page_token = if end < self.all_results.len() { + Some((self.current_page + 1).to_string()) + } else { + None + }; + } + + self.current_page += 1; + let finished = end >= self.all_results.len(); + Ok((page_results, finished)) + } + fn get_metadata(&self) -> Option { + self.metadata.clone() + } +} + +// Create a wrapper that implements GuestSearchSession properly +struct TavilySearchSession(RefCell); + +impl TavilySearchSession { + fn new(search: TavilySearch) -> Self { + Self(RefCell::new(search)) + } +} + +impl GuestSearchSession for TavilySearchSession { + fn next_page(&self) -> Result, SearchError> { + let mut search = self.0.borrow_mut(); + let (results, _) = search.next_page()?; + Ok(results) + } + fn get_metadata(&self) -> Option { + let search = self.0.borrow(); + search.get_metadata() + } +} + +struct TavilySearchComponent; + +impl TavilySearchComponent { + const API_KEY_VAR: &'static str = "TAVILY_API_KEY"; + + fn create_client() -> Result { + let api_key = Self::get_api_key()?; + Ok(TavilySearchApi::new(api_key)) + } + + fn get_api_key() -> Result { + std::env::var(Self::API_KEY_VAR).map_err(|_| { + SearchError::BackendError("TAVILY_API_KEY environment variable not set".to_string()) + }) + } + + fn execute_search( + params: SearchParams, + ) -> Result<(Vec, SearchMetadata), SearchError> { + validate_search_params(¶ms)?; + + let client = Self::create_client()?; + let api_key = Self::get_api_key()?; + let request = params_to_request(params.clone(), api_key, 1)?; + + let response = client.search(request)?; + let (results, metadata) = response_to_results(response, ¶ms, 1); + + // Unwrap the metadata Option since we know it should be Some + Ok((results, metadata)) + } + + fn start_search_session(params: SearchParams) -> Result { + validate_search_params(¶ms)?; + + let client = Self::create_client()?; + let api_key = Self::get_api_key()?; + let request = params_to_request(params.clone(), api_key, 1)?; + + let search = TavilySearch::new(client, request, params); + Ok(TavilySearchSession::new(search)) + } +} + +impl Guest for TavilySearchComponent { + type SearchSession = TavilySearchSession; + + fn start_search(params: SearchParams) -> Result { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + match Self::start_search_session(params) { + Ok(session) => Ok(SearchSession::new(session)), + Err(err) => Err(err), + } + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + LOGGING_STATE.with_borrow_mut(|state| state.init()); + let (results, metadata) = Self::execute_search(params)?; + Ok((results, Some(metadata))) + } +} + +impl ExtendedwebsearchGuest for TavilySearchComponent { + type ReplayState = TavilyReplayState; + fn unwrapped_search_session(params: SearchParams) -> Result { + let client = Self::create_client()?; + let api_key = Self::get_api_key()?; + let request = crate::conversions::params_to_request(params.clone(), api_key, 0)?; + let search = TavilySearch::new(client, request, params); + Ok(TavilySearchSession::new(search)) + } + fn session_to_state(session: &Self::SearchSession) -> Self::ReplayState { + let mut search = session.0.borrow_mut(); + let (_, finished) = search.next_page().unwrap_or((vec![], true)); + TavilyReplayState { + api_key: search.client.api_key().to_string(), + metadata: search.metadata.clone(), + finished, + all_results: search.all_results.clone(), + current_page: search.current_page, + } + } + fn session_from_state( + state: &Self::ReplayState, + params: SearchParams, + ) -> Result { + let client = TavilySearchApi::new(state.api_key.clone()); + let request = + crate::conversions::params_to_request(params.clone(), state.api_key.clone(), 0)?; + let mut search = TavilySearch::new(client, request, params); + search.metadata = state.metadata.clone(); + search.all_results = state.all_results.clone(); + search.current_page = state.current_page; + if state.finished { + let _ = search.next_page(); + } + Ok(TavilySearchSession::new(search)) + } +} + +type DurableTavilyComponent = Durablewebsearch; +golem_web_search::export_websearch!(DurableTavilyComponent with_types_in golem_web_search); diff --git a/websearch/tavily/wit/deps/golem-web-search/golem-web-search.wit b/websearch/tavily/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..550ee50f0 --- /dev/null +++ b/websearch/tavily/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + current-page: u32, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result, search-error>; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + +world websearch-library { + export web-search; + export types; +} \ No newline at end of file diff --git a/websearch/tavily/wit/deps/wasi:io/error.wit b/websearch/tavily/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/websearch/tavily/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/websearch/tavily/wit/deps/wasi:io/poll.wit b/websearch/tavily/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/websearch/tavily/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/websearch/tavily/wit/deps/wasi:io/streams.wit b/websearch/tavily/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/websearch/tavily/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/websearch/tavily/wit/deps/wasi:io/world.wit b/websearch/tavily/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/websearch/tavily/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/websearch/tavily/wit/tavily.wit b/websearch/tavily/wit/tavily.wit new file mode 100644 index 000000000..2c696836f --- /dev/null +++ b/websearch/tavily/wit/tavily.wit @@ -0,0 +1,6 @@ +package golem:web-search-travily@1.0.0; + +world websearch-library { + export golem:web-search/web-search@1.0.0; + export golem:web-search/types@1.0.0; +} diff --git a/websearch/websearch/Cargo.toml b/websearch/websearch/Cargo.toml new file mode 100644 index 000000000..3c68d198e --- /dev/null +++ b/websearch/websearch/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "golem-web-search" +version = "0.0.0" +edition = "2021" +license = "Apache-2.0" +homepage = "https://golem.cloud" +repository = "https://github.com/golemcloud/golem-llm" +description = "WebAssembly components for working with websearch APIs, with special support for Golem Cloud" + +[lib] +path = "src/lib.rs" +crate-type = ["rlib"] + +[features] +default = ["durability"] +durability = ["golem-rust/durability"] + +[dependencies] +golem-rust = { workspace = true } +log = { workspace = true } +reqwest = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +wasi-logger = "0.1.2" +wit-bindgen = { version = "0.40.0" } + diff --git a/websearch/websearch/src/config.rs b/websearch/websearch/src/config.rs new file mode 100644 index 000000000..ec118da27 --- /dev/null +++ b/websearch/websearch/src/config.rs @@ -0,0 +1,27 @@ +use std::ffi::OsStr; + +#[derive(Debug, Clone)] +pub enum SearchError { + ConfigurationError(String), + InvalidQuery, + BackendError(String), + RateLimited(u32), + UnsupportedFeature(String), +} + +/// Gets an expected configuration value from the environment, and fails if its is not found +/// using the `fail` function. Otherwise, it runs `succeed` with the configuration value. +pub fn with_search_config( + key: impl AsRef, + fail: impl FnOnce(SearchError) -> R, + succeed: impl FnOnce(String) -> R, +) -> R { + let key_str = key.as_ref().to_string_lossy().to_string(); + match std::env::var(&key) { + Ok(value) => succeed(value), + Err(_) => { + let error = SearchError::ConfigurationError(format!("Missing config key: {key_str}")); + fail(error) + } + } +} diff --git a/websearch/websearch/src/durability.rs b/websearch/websearch/src/durability.rs new file mode 100644 index 000000000..52b30d82a --- /dev/null +++ b/websearch/websearch/src/durability.rs @@ -0,0 +1,494 @@ +use crate::exports::golem::web_search::web_search::Guest; +use crate::exports::golem::web_search::web_search::{SearchError, SearchParams}; +use golem_rust::value_and_type::{FromValueAndType, IntoValue as IntoValueTrait}; +use std::marker::PhantomData; + +/// Wraps a websearch implementation with custom durability +pub struct Durablewebsearch { + phantom: PhantomData, +} + +/// Trait to be implemented in addition to the websearch `Guest` trait when wrapping it with `Durablewebsearch`. +pub trait ExtendedwebsearchGuest: Guest + 'static { + type ReplayState: std::fmt::Debug + Clone + IntoValueTrait + FromValueAndType; + + /// Creates an instance of the websearch specific `SearchSession` without wrapping it in a `Resource` + fn unwrapped_search_session(params: SearchParams) -> Result; + + /// Used at the end of replay to go from replay to live mode + fn session_to_state(session: &Self::SearchSession) -> Self::ReplayState; + fn session_from_state( + state: &Self::ReplayState, + params: SearchParams, + ) -> Result; +} + +/// When the durability feature flag is off, wrapping with `Durablewebsearch` is just a passthrough +#[cfg(not(feature = "durability"))] +mod passthrough_impl { + use crate::durability::{Durablewebsearch, ExtendedwebsearchGuest}; + use crate::golem::web_search::web_search::{Guest, SearchSession}; + use crate::golem::web_search::web_search::{ + SearchError, SearchMetadata, SearchParams, SearchResult, + }; + + impl Guest for Durablewebsearch { + type SearchSession = Impl::SearchSession; + + fn start_search(params: SearchParams) -> Result { + Impl::start_search(params) + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + Impl::search_once(params) + } + } +} + +/// When the durability feature flag is on, wrapping with `Durablewebsearch` adds custom durability +/// on top of the provider-specific websearch implementation using Golem's special host functions and +/// the `golem-rust` helper library. +/// +/// There will be custom durability entries saved in the oplog, with the full websearch request and configuration +/// stored as input, and the full response stored as output. To serialize these in a way it is +/// observable by oplog consumers, each relevant data type has to be converted to/from `ValueAndType` +/// which is implemented using the type classes and builder in the `golem-rust` library. +#[cfg(feature = "durability")] +mod durable_impl { + use crate::durability::{Durablewebsearch, ExtendedwebsearchGuest}; + use crate::exports::golem::web_search::web_search::{Guest, GuestSearchSession, SearchSession}; + use crate::exports::golem::web_search::web_search::{ + SearchError, SearchMetadata, SearchParams, SearchResult, + }; + use golem_rust::bindings::golem::durability::durability::DurableFunctionType; + use golem_rust::durability::Durability; + use golem_rust::{with_persistence_level, PersistenceLevel}; + use std::cell::RefCell; + + #[derive(Debug, golem_rust::IntoValue)] + struct NoInput; + + // Add the From implementation for SearchError to satisfy the Durability trait bounds + impl From<&SearchError> for SearchError { + fn from(error: &SearchError) -> Self { + error.clone() + } + } + + impl Guest for Durablewebsearch { + type SearchSession = DurableSearchSession; + + fn start_search(params: SearchParams) -> Result { + let durability = Durability::::new( + "golem_websearch", + "start_search", + DurableFunctionType::WriteRemote, + ); + + if durability.is_live() { + let result = with_persistence_level(PersistenceLevel::PersistNothing, || { + Impl::unwrapped_search_session(params.clone()) + }); + + match result { + Ok(session) => { + let replay_state = Impl::session_to_state(&session); + let _ = durability.persist(params.clone(), Ok(replay_state)); + Ok(SearchSession::new(DurableSearchSession::::live( + session, params, + ))) + } + Err(error) => { + let _ = durability.persist(params.clone(), Err(error.clone())); + Err(error) + } + } + } else { + let replay_state = durability.replay::()?; + let session = DurableSearchSession::::replay(replay_state, params)?; + Ok(SearchSession::new(session)) + } + } + + fn search_once( + params: SearchParams, + ) -> Result<(Vec, Option), SearchError> { + let durability = + Durability::<(Vec, Option), SearchError>::new( + "golem_websearch", + "search_once", + DurableFunctionType::WriteRemote, + ); + + if durability.is_live() { + let result = with_persistence_level(PersistenceLevel::PersistNothing, || { + Impl::search_once(params.clone()) + }); + + match result { + Ok((results, metadata)) => { + durability + .persist(params.clone(), Ok((results.clone(), metadata.clone())))?; + Ok((results, metadata)) + } + Err(error) => { + let _ = durability + .persist::<_, (Vec, Option), SearchError>( + params.clone(), + Err(error.clone()), + ); + Err(error) + } + } + } else { + let result = durability + .replay::<(Vec, Option), SearchError>()?; + Ok(result) + } + } + } + + /// Represents the durable search session's state + /// + /// In live mode it directly calls the underlying websearch session which is implemented on + /// top of HTTP requests to search providers. + /// + /// In replay mode it uses the replay state to reconstruct the session state accurately, + /// tracking accumulated results and metadata. + /// + /// When reaching the end of the replay mode, if the replayed session was not finished yet, + /// the retry parameters implemented in `ExtendedwebsearchGuest` is used to create a new websearch session + /// and continue the search seamlessly. + enum DurableSearchSessionState { + Live { session: Impl::SearchSession }, + Replay { replay_state: Impl::ReplayState }, + } + + pub struct DurableSearchSession { + state: RefCell>>, + params: SearchParams, + } + + impl DurableSearchSession { + fn live(session: Impl::SearchSession, params: SearchParams) -> Self { + Self { + state: RefCell::new(Some(DurableSearchSessionState::Live { session })), + params, + } + } + + fn replay( + replay_state: Impl::ReplayState, + params: SearchParams, + ) -> Result { + Ok(Self { + state: RefCell::new(Some(DurableSearchSessionState::Replay { replay_state })), + params, + }) + } + } + + impl Drop for DurableSearchSession { + fn drop(&mut self) { + match self.state.take() { + Some(DurableSearchSessionState::Live { session }) => { + with_persistence_level(PersistenceLevel::PersistNothing, move || { + drop(session); + }); + } + Some(DurableSearchSessionState::Replay { .. }) => { + // Nothing special to clean up for replay state + } + None => {} + } + } + } + + impl GuestSearchSession for DurableSearchSession { + fn next_page(&self) -> Result, SearchError> { + let durability = Durability::<(Vec, Impl::ReplayState), SearchError>::new( + "golem_websearch", + "next_page", + DurableFunctionType::ReadRemote, + ); + + if durability.is_live() { + let mut state = self.state.borrow_mut(); + match &mut *state { + Some(DurableSearchSessionState::Live { session }) => { + let result = + with_persistence_level(PersistenceLevel::PersistNothing, || { + session.next_page() + }); + + match result { + Ok(value) => { + let replay_state = Impl::session_to_state(session); + let persisted_result = durability + .persist(NoInput, Ok((value.clone(), replay_state)))?; + Ok(persisted_result.0) + } + Err(error) => { + let _ = durability.persist::< + _, + (Vec, Impl::ReplayState), + SearchError + >(NoInput, Err(error.clone())); + Err(error) + } + } + } + Some(DurableSearchSessionState::Replay { replay_state }) => { + let session = Impl::session_from_state(replay_state, self.params.clone())?; + let result = + with_persistence_level(PersistenceLevel::PersistNothing, || { + session.next_page() + }); + + match result { + Ok(value) => { + let new_replay_state = Impl::session_to_state(&session); + let persisted_result = durability + .persist(NoInput, Ok((value.clone(), new_replay_state)))?; + *state = Some(DurableSearchSessionState::Live { session }); + Ok(persisted_result.0) + } + Err(error) => { + let _ = durability.persist::< + _, + (Vec, Impl::ReplayState), + SearchError + >(NoInput, Err(error.clone())); + Err(error) + } + } + } + None => unreachable!(), + } + } else { + let (result, next_replay_state) = + durability.replay::<(Vec, Impl::ReplayState), SearchError>()?; + let mut state = self.state.borrow_mut(); + + match &mut *state { + Some(DurableSearchSessionState::Live { .. }) => { + unreachable!("Durable search session cannot be in live mode during replay"); + } + Some(DurableSearchSessionState::Replay { replay_state: _ }) => { + *state = Some(DurableSearchSessionState::Replay { + replay_state: next_replay_state.clone(), + }); + Ok(result) + } + None => { + unreachable!(); + } + } + } + } + + fn get_metadata(&self) -> Option { + let state = self.state.borrow(); + match &*state { + Some(DurableSearchSessionState::Live { session }) => { + with_persistence_level(PersistenceLevel::PersistNothing, || { + session.get_metadata() + }) + } + Some(DurableSearchSessionState::Replay { replay_state }) => { + let session = + Impl::session_from_state(replay_state, self.params.clone()).ok()?; + session.get_metadata() + } + None => { + unreachable!() + } + } + } + } + + #[cfg(test)] + mod tests { + use crate::golem::web_search::types::{ + ImageResult, RateLimitInfo, SafeSearchLevel, TimeRange, + }; + use crate::golem::web_search::web_search::{ + SearchError, SearchMetadata, SearchParams, SearchResult, + }; + use golem_rust::value_and_type::{FromValueAndType, IntoValueAndType}; + use golem_rust::wasm_rpc::WitTypeNode; + use std::fmt::Debug; + + fn roundtrip_test( + value: T, + ) { + let vnt = value.clone().into_value_and_type(); + let extracted = T::from_value_and_type(vnt).unwrap(); + assert_eq!(value, extracted); + } + + #[test] + fn safe_search_level_roundtrip() { + roundtrip_test(SafeSearchLevel::Off); + roundtrip_test(SafeSearchLevel::Medium); + roundtrip_test(SafeSearchLevel::High); + } + + #[test] + fn time_range_roundtrip() { + roundtrip_test(TimeRange::Day); + roundtrip_test(TimeRange::Week); + roundtrip_test(TimeRange::Month); + roundtrip_test(TimeRange::Year); + } + + #[test] + fn search_error_roundtrip() { + roundtrip_test(SearchError::InvalidQuery); + roundtrip_test(SearchError::RateLimited(3600)); + roundtrip_test(SearchError::UnsupportedFeature( + "advanced search".to_string(), + )); + roundtrip_test(SearchError::BackendError("Service unavailable".to_string())); + } + + #[test] + fn image_result_roundtrip() { + roundtrip_test(ImageResult { + url: "https://example.com/image.png".to_string(), + description: Some("A sample image".to_string()), + }); + roundtrip_test(ImageResult { + url: "https://example.com/image2.jpg".to_string(), + description: None, + }); + } + + #[test] + fn rate_limit_info_roundtrip() { + roundtrip_test(RateLimitInfo { + limit: 1000, + remaining: 500, + reset_timestamp: 1698761200, + }); + } + + #[test] + fn search_result_roundtrip() { + roundtrip_test(SearchResult { + title: "Sample Search Result".to_string(), + url: "https://example.com/page".to_string(), + snippet: "This is a sample search result snippet".to_string(), + display_url: Some("example.com/page".to_string()), + source: Some("Example Website".to_string()), + score: Some(0.95), + html_snippet: Some("

This is a sample search result snippet

".to_string()), + date_published: Some("2023-10-01".to_string()), + images: Some(vec![ImageResult { + url: "https://example.com/thumb.jpg".to_string(), + description: Some("Thumbnail".to_string()), + }]), + content_chunks: Some(vec![ + "First chunk of content".to_string(), + "Second chunk of content".to_string(), + ]), + }); + } + + #[test] + fn search_metadata_roundtrip() { + roundtrip_test(SearchMetadata { + query: "sample search query".to_string(), + total_results: Some(1500), + search_time_ms: Some(125.5), + safe_search: Some(SafeSearchLevel::Medium), + language: Some("en".to_string()), + region: Some("US".to_string()), + next_page_token: Some("next_page_123".to_string()), + rate_limits: Some(RateLimitInfo { + limit: 1000, + remaining: 999, + reset_timestamp: 1698761200, + }), + current_page: 0, + }); + } + + #[test] + fn search_params_roundtrip() { + roundtrip_test(SearchParams { + query: "rust programming language".to_string(), + safe_search: Some(SafeSearchLevel::High), + language: Some("en".to_string()), + region: Some("US".to_string()), + max_results: Some(50), + time_range: Some(TimeRange::Month), + include_domains: Some(vec![ + "rust-lang.org".to_string(), + "doc.rust-lang.org".to_string(), + ]), + exclude_domains: Some(vec!["spam.com".to_string()]), + include_images: Some(true), + include_html: Some(false), + advanced_answer: Some(true), + }); + } + + #[test] + fn start_search_input_encoding() { + let input = SearchParams { + query: "machine learning tutorials".to_string(), + safe_search: Some(SafeSearchLevel::Medium), + language: Some("en".to_string()), + region: Some("US".to_string()), + max_results: Some(25), + time_range: Some(TimeRange::Week), + include_domains: Some(vec![ + "github.com".to_string(), + "stackoverflow.com".to_string(), + ]), + exclude_domains: Some(vec!["ads.com".to_string()]), + include_images: Some(true), + include_html: Some(true), + advanced_answer: Some(false), + }; + + let encoded = input.into_value_and_type(); + println!("{encoded:#?}"); + + for wit_type in encoded.typ.nodes { + if let WitTypeNode::ListType(idx) = wit_type { + assert!(idx >= 0); + } + } + } + + #[test] + fn search_once_input_encoding() { + let input = SearchParams { + query: "web development best practices".to_string(), + safe_search: Some(SafeSearchLevel::Off), + language: Some("en".to_string()), + region: Some("GB".to_string()), + max_results: Some(10), + time_range: None, + include_domains: None, + exclude_domains: None, + include_images: Some(false), + include_html: Some(true), + advanced_answer: Some(true), + }; + + let encoded = input.into_value_and_type(); + println!("{encoded:#?}"); + + for wit_type in encoded.typ.nodes { + if let WitTypeNode::ListType(idx) = wit_type { + assert!(idx >= 0); + } + } + } + } +} diff --git a/websearch/websearch/src/error.rs b/websearch/websearch/src/error.rs new file mode 100644 index 000000000..69ddddd2d --- /dev/null +++ b/websearch/websearch/src/error.rs @@ -0,0 +1,35 @@ +use crate::golem::web_search::web_search::SearchError; +use reqwest::StatusCode; +use std::error::Error; + +pub fn unsupported(what: impl AsRef) -> SearchError { + SearchError::UnsupportedFeature(format!("Unsupported: {}", what.as_ref())) +} + +pub fn from_reqwest_error(context: impl AsRef, err: reqwest::Error) -> SearchError { + SearchError::BackendError(format!("{}: {}", context.as_ref(), err)) +} + +pub fn from_generic_error(context: impl AsRef, err: T) -> SearchError { + SearchError::BackendError(format!("{}: {}", context.as_ref(), err)) +} + +pub fn error_from_status(status: StatusCode, body: Option) -> SearchError { + match status { + StatusCode::TOO_MANY_REQUESTS => { + let retry_after = body.and_then(|b| b.parse::().ok()).unwrap_or(60); + SearchError::RateLimited(retry_after) + } + StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN | StatusCode::PAYMENT_REQUIRED => { + SearchError::BackendError("Authentication failed".to_string()) + } + s if s.is_client_error() => SearchError::InvalidQuery, + _ => { + let message = match body { + Some(b) => format!("HTTP {status}: {b}"), + None => format!("HTTP {status}"), + }; + SearchError::BackendError(message) + } + } +} diff --git a/websearch/websearch/src/lib.rs b/websearch/websearch/src/lib.rs new file mode 100644 index 000000000..c02f647f2 --- /dev/null +++ b/websearch/websearch/src/lib.rs @@ -0,0 +1,54 @@ +pub mod config; +pub mod durability; +pub mod error; +pub mod types; + +wit_bindgen::generate!({ + path: "../wit", + world: "websearch-library", + generate_all, + generate_unused_types: true, + additional_derives: [ + PartialEq, + golem_rust::FromValueAndType, + golem_rust::IntoValue, + Clone, + ], + pub_export_macro: true, +}); + +// Export the generated bindings properly +pub use crate::exports::golem; +pub use __export_websearch_library_impl as export_websearch; + +use std::cell::RefCell; +use std::str::FromStr; + +/// Internal state for configuring WASI log levels during runtime. +pub struct LoggingState { + logging_initialized: bool, +} + +impl LoggingState { + /// Initializes WASI logging based on the `GOLEM_WEB_SEARCH_LOG` environment variable. + pub fn init(&mut self) { + if !self.logging_initialized { + let _ = wasi_logger::Logger::install(); + let max_level = log::LevelFilter::from_str( + &std::env::var("GOLEM_WEB_SEARCH_LOG").unwrap_or_default(), + ) + .unwrap_or(log::LevelFilter::Info); + log::set_max_level(max_level); + self.logging_initialized = true; + } + } +} + +thread_local! { + /// Thread-local holder for logging state, initialized on first access. + pub static LOGGING_STATE: RefCell = const { + RefCell::new(LoggingState { + logging_initialized: false, + }) + }; +} diff --git a/websearch/websearch/src/types.rs b/websearch/websearch/src/types.rs new file mode 100644 index 000000000..510449d97 --- /dev/null +++ b/websearch/websearch/src/types.rs @@ -0,0 +1,112 @@ +use serde::{Deserialize, Serialize}; +/// A single search result entry returned in the NDJSON stream. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct SearchResult { + /// Kind of message (should be `"result"`) + pub kind: String, + /// Title of the search result + pub title: String, + /// URL of the result + pub url: String, + /// Text snippet summarizing the result + pub snippet: String, + /// Display URL (if different from `url`) + #[serde(rename = "display-url")] + pub display_url: Option, + /// Source or provider of the result + pub source: Option, + /// Relevance score (if provided) + pub score: Option, + /// HTML-formatted snippet (if available) + #[serde(rename = "html-snippet")] + pub html_snippet: Option, + /// Publication date (if known) + #[serde(rename = "date-published")] + pub date_published: Option, + /// Associated images (if any) + pub images: Option>, + /// Optional semantic content chunks + #[serde(rename = "content-chunks")] + pub content_chunks: Option>, +} + +/// An image associated with a search result. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct ImageResult { + /// Direct image URL + pub url: String, + /// Optional description of the image + pub description: Option, +} + +/// Search metadata, typically emitted at the end of a stream. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct SearchMetadata { + /// Kind of message (should be `"meta"`) + pub kind: String, + /// Original query string + pub query: String, + /// Total number of results found + #[serde(rename = "total-results")] + pub total_results: Option, + /// Time taken to perform the search (in milliseconds) + #[serde(rename = "search-time-ms")] + pub search_time_ms: Option, + /// Safe search level applied + #[serde(rename = "safe-search")] + pub safe_search: Option, + /// Language used for the search + pub language: Option, + /// Region or locale of the search + pub region: Option, + /// Token for fetching the next page + #[serde(rename = "next-page-token")] + pub next_page_token: Option, + /// Rate limit information + #[serde(rename = "rate-limits")] + pub rate_limits: Option, + /// Current page number + #[serde(rename = "current-page")] + pub current_page: u32, +} + +/// Level of safe search filtering. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum SafeSearchLevel { + Off, + Medium, + High, +} + +/// Metadata about the API's rate limits. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct RateLimitInfo { + /// Maximum allowed requests + pub limit: u32, + /// Remaining requests before throttling + pub remaining: u32, + /// Reset time (epoch milliseconds) + #[serde(rename = "reset-timestamp")] + pub reset_timestamp: u64, +} + +/// Marker indicating the end of a stream. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StreamEnd { + /// Kind of message (should be `"done"`) + pub kind: String, +} + +/// A parsed item from the NDJSON search stream. +#[derive(Debug, Clone, PartialEq)] +pub enum WebsearchStreamEntry { + /// A search result + Result(SearchResult), + /// Summary metadata + Metadata(SearchMetadata), + /// Stream termination signal + Done, + /// An unrecognized or malformed line + Unknown(String), +} diff --git a/websearch/websearch/wit/deps/golem-web-search/golem-web-search.wit b/websearch/websearch/wit/deps/golem-web-search/golem-web-search.wit new file mode 100644 index 000000000..550ee50f0 --- /dev/null +++ b/websearch/websearch/wit/deps/golem-web-search/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + current-page: u32, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result, search-error>; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + +world websearch-library { + export web-search; + export types; +} \ No newline at end of file diff --git a/websearch/websearch/wit/deps/wasi:io/error.wit b/websearch/websearch/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/websearch/websearch/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/websearch/websearch/wit/deps/wasi:io/poll.wit b/websearch/websearch/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/websearch/websearch/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/websearch/websearch/wit/deps/wasi:io/streams.wit b/websearch/websearch/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/websearch/websearch/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/websearch/websearch/wit/deps/wasi:io/world.wit b/websearch/websearch/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/websearch/websearch/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/websearch/websearch/wit/websearch.wit b/websearch/websearch/wit/websearch.wit new file mode 100644 index 000000000..8b4c7975c --- /dev/null +++ b/websearch/websearch/wit/websearch.wit @@ -0,0 +1,6 @@ +package golem:web-search-library@1.0.0; + +world websearch-library { + export golem:web-search/web-search@1.0.0; + export golem:web-search/types@1.0.0; +} diff --git a/websearch/wit/deps.lock b/websearch/wit/deps.lock new file mode 100644 index 000000000..adc795b3a --- /dev/null +++ b/websearch/wit/deps.lock @@ -0,0 +1,4 @@ +["wasi:io"] +url = "https://github.com/WebAssembly/wasi-io/archive/v0.2.3.tar.gz" +sha256 = "1cccbfe4122686ea57a25cd368e8cdfc408cbcad089f47fb6685b6f92e96f050" +sha512 = "7a95f964c13da52611141acd89bc8876226497f128e99dd176a4270c5b5efbd8cc847b5fbd1a91258d028c646db99e0424d72590cf1caf20f9f3a3343fad5017" diff --git a/websearch/wit/deps.toml b/websearch/wit/deps.toml new file mode 100644 index 000000000..15e1ae691 --- /dev/null +++ b/websearch/wit/deps.toml @@ -0,0 +1 @@ +"wasi:io" = "https://github.com/WebAssembly/wasi-io/archive/v0.2.3.tar.gz" diff --git a/websearch/wit/deps/wasi:io/error.wit b/websearch/wit/deps/wasi:io/error.wit new file mode 100644 index 000000000..97c606877 --- /dev/null +++ b/websearch/wit/deps/wasi:io/error.wit @@ -0,0 +1,34 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +interface error { + /// A resource which represents some error information. + /// + /// The only method provided by this resource is `to-debug-string`, + /// which provides some human-readable information about the error. + /// + /// In the `wasi:io` package, this resource is returned through the + /// `wasi:io/streams/stream-error` type. + /// + /// To provide more specific error information, other interfaces may + /// offer functions to "downcast" this error into more specific types. For example, + /// errors returned from streams derived from filesystem types can be described using + /// the filesystem's own error-code type. This is done using the function + /// `wasi:filesystem/types/filesystem-error-code`, which takes a `borrow` + /// parameter and returns an `option`. + /// + /// The set of functions which can "downcast" an `error` into a more + /// concrete type is open. + @since(version = 0.2.0) + resource error { + /// Returns a string that is suitable to assist humans in debugging + /// this error. + /// + /// WARNING: The returned string should not be consumed mechanically! + /// It may change across platforms, hosts, or other implementation + /// details. Parsing this string is a major platform-compatibility + /// hazard. + @since(version = 0.2.0) + to-debug-string: func() -> string; + } +} diff --git a/websearch/wit/deps/wasi:io/poll.wit b/websearch/wit/deps/wasi:io/poll.wit new file mode 100644 index 000000000..9bcbe8e03 --- /dev/null +++ b/websearch/wit/deps/wasi:io/poll.wit @@ -0,0 +1,47 @@ +package wasi:io@0.2.3; + +/// A poll API intended to let users wait for I/O events on multiple handles +/// at once. +@since(version = 0.2.0) +interface poll { + /// `pollable` represents a single I/O event which may be ready, or not. + @since(version = 0.2.0) + resource pollable { + + /// Return the readiness of a pollable. This function never blocks. + /// + /// Returns `true` when the pollable is ready, and `false` otherwise. + @since(version = 0.2.0) + ready: func() -> bool; + + /// `block` returns immediately if the pollable is ready, and otherwise + /// blocks until ready. + /// + /// This function is equivalent to calling `poll.poll` on a list + /// containing only this pollable. + @since(version = 0.2.0) + block: func(); + } + + /// Poll for completion on a set of pollables. + /// + /// This function takes a list of pollables, which identify I/O sources of + /// interest, and waits until one or more of the events is ready for I/O. + /// + /// The result `list` contains one or more indices of handles in the + /// argument list that is ready for I/O. + /// + /// This function traps if either: + /// - the list is empty, or: + /// - the list contains more elements than can be indexed with a `u32` value. + /// + /// A timeout can be implemented by adding a pollable from the + /// wasi-clocks API to the list. + /// + /// This function does not return a `result`; polling in itself does not + /// do any I/O so it doesn't fail. If any of the I/O sources identified by + /// the pollables has an error, it is indicated by marking the source as + /// being ready for I/O. + @since(version = 0.2.0) + poll: func(in: list>) -> list; +} diff --git a/websearch/wit/deps/wasi:io/streams.wit b/websearch/wit/deps/wasi:io/streams.wit new file mode 100644 index 000000000..0de084629 --- /dev/null +++ b/websearch/wit/deps/wasi:io/streams.wit @@ -0,0 +1,290 @@ +package wasi:io@0.2.3; + +/// WASI I/O is an I/O abstraction API which is currently focused on providing +/// stream types. +/// +/// In the future, the component model is expected to add built-in stream types; +/// when it does, they are expected to subsume this API. +@since(version = 0.2.0) +interface streams { + @since(version = 0.2.0) + use error.{error}; + @since(version = 0.2.0) + use poll.{pollable}; + + /// An error for input-stream and output-stream operations. + @since(version = 0.2.0) + variant stream-error { + /// The last operation (a write or flush) failed before completion. + /// + /// More information is available in the `error` payload. + /// + /// After this, the stream will be closed. All future operations return + /// `stream-error::closed`. + last-operation-failed(error), + /// The stream is closed: no more input will be accepted by the + /// stream. A closed output-stream will return this error on all + /// future operations. + closed + } + + /// An input bytestream. + /// + /// `input-stream`s are *non-blocking* to the extent practical on underlying + /// platforms. I/O operations always return promptly; if fewer bytes are + /// promptly available than requested, they return the number of bytes promptly + /// available, which could even be zero. To wait for data to be available, + /// use the `subscribe` function to obtain a `pollable` which can be polled + /// for using `wasi:io/poll`. + @since(version = 0.2.0) + resource input-stream { + /// Perform a non-blocking read from the stream. + /// + /// When the source of a `read` is binary data, the bytes from the source + /// are returned verbatim. When the source of a `read` is known to the + /// implementation to be text, bytes containing the UTF-8 encoding of the + /// text are returned. + /// + /// This function returns a list of bytes containing the read data, + /// when successful. The returned list will contain up to `len` bytes; + /// it may return fewer than requested, but not more. The list is + /// empty when no bytes are available for reading at this time. The + /// pollable given by `subscribe` will be ready when more bytes are + /// available. + /// + /// This function fails with a `stream-error` when the operation + /// encounters an error, giving `last-operation-failed`, or when the + /// stream is closed, giving `closed`. + /// + /// When the caller gives a `len` of 0, it represents a request to + /// read 0 bytes. If the stream is still open, this call should + /// succeed and return an empty list, or otherwise fail with `closed`. + /// + /// The `len` parameter is a `u64`, which could represent a list of u8 which + /// is not possible to allocate in wasm32, or not desirable to allocate as + /// as a return value by the callee. The callee may return a list of bytes + /// less than `len` in size while more bytes are available for reading. + @since(version = 0.2.0) + read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Read bytes from a stream, after blocking until at least one byte can + /// be read. Except for blocking, behavior is identical to `read`. + @since(version = 0.2.0) + blocking-read: func( + /// The maximum number of bytes to read + len: u64 + ) -> result, stream-error>; + + /// Skip bytes from a stream. Returns number of bytes skipped. + /// + /// Behaves identical to `read`, except instead of returning a list + /// of bytes, returns the number of bytes consumed from the stream. + @since(version = 0.2.0) + skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Skip bytes from a stream, after blocking until at least one byte + /// can be skipped. Except for blocking behavior, identical to `skip`. + @since(version = 0.2.0) + blocking-skip: func( + /// The maximum number of bytes to skip. + len: u64, + ) -> result; + + /// Create a `pollable` which will resolve once either the specified stream + /// has bytes available to read or the other end of the stream has been + /// closed. + /// The created `pollable` is a child resource of the `input-stream`. + /// Implementations may trap if the `input-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + } + + + /// An output bytestream. + /// + /// `output-stream`s are *non-blocking* to the extent practical on + /// underlying platforms. Except where specified otherwise, I/O operations also + /// always return promptly, after the number of bytes that can be written + /// promptly, which could even be zero. To wait for the stream to be ready to + /// accept data, the `subscribe` function to obtain a `pollable` which can be + /// polled for using `wasi:io/poll`. + /// + /// Dropping an `output-stream` while there's still an active write in + /// progress may result in the data being lost. Before dropping the stream, + /// be sure to fully flush your writes. + @since(version = 0.2.0) + resource output-stream { + /// Check readiness for writing. This function never blocks. + /// + /// Returns the number of bytes permitted for the next call to `write`, + /// or an error. Calling `write` with more bytes than this function has + /// permitted will trap. + /// + /// When this function returns 0 bytes, the `subscribe` pollable will + /// become ready when this function will report at least 1 byte, or an + /// error. + @since(version = 0.2.0) + check-write: func() -> result; + + /// Perform a write. This function never blocks. + /// + /// When the destination of a `write` is binary data, the bytes from + /// `contents` are written verbatim. When the destination of a `write` is + /// known to the implementation to be text, the bytes of `contents` are + /// transcoded from UTF-8 into the encoding of the destination and then + /// written. + /// + /// Precondition: check-write gave permit of Ok(n) and contents has a + /// length of less than or equal to n. Otherwise, this function will trap. + /// + /// returns Err(closed) without writing if the stream has closed since + /// the last call to check-write provided a permit. + @since(version = 0.2.0) + write: func( + contents: list + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 bytes, and then flush the stream. Block + /// until all of these operations are complete, or an error occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write`, and `flush`, and is implemented with the + /// following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while !contents.is_empty() { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, contents.len()); + /// let (chunk, rest) = contents.split_at(len); + /// this.write(chunk ); // eliding error handling + /// contents = rest; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-and-flush: func( + contents: list + ) -> result<_, stream-error>; + + /// Request to flush buffered output. This function never blocks. + /// + /// This tells the output-stream that the caller intends any buffered + /// output to be flushed. the output which is expected to be flushed + /// is all that has been passed to `write` prior to this call. + /// + /// Upon calling this function, the `output-stream` will not accept any + /// writes (`check-write` will return `ok(0)`) until the flush has + /// completed. The `subscribe` pollable will become ready when the + /// flush has completed and the stream can accept more writes. + @since(version = 0.2.0) + flush: func() -> result<_, stream-error>; + + /// Request to flush buffered output, and block until flush completes + /// and stream is ready for writing again. + @since(version = 0.2.0) + blocking-flush: func() -> result<_, stream-error>; + + /// Create a `pollable` which will resolve once the output-stream + /// is ready for more writing, or an error has occurred. When this + /// pollable is ready, `check-write` will return `ok(n)` with n>0, or an + /// error. + /// + /// If the stream is closed, this pollable is always ready immediately. + /// + /// The created `pollable` is a child resource of the `output-stream`. + /// Implementations may trap if the `output-stream` is dropped before + /// all derived `pollable`s created with this function are dropped. + @since(version = 0.2.0) + subscribe: func() -> pollable; + + /// Write zeroes to a stream. + /// + /// This should be used precisely like `write` with the exact same + /// preconditions (must use check-write first), but instead of + /// passing a list of bytes, you simply pass the number of zero-bytes + /// that should be written. + @since(version = 0.2.0) + write-zeroes: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Perform a write of up to 4096 zeroes, and then flush the stream. + /// Block until all of these operations are complete, or an error + /// occurs. + /// + /// This is a convenience wrapper around the use of `check-write`, + /// `subscribe`, `write-zeroes`, and `flush`, and is implemented with + /// the following pseudo-code: + /// + /// ```text + /// let pollable = this.subscribe(); + /// while num_zeroes != 0 { + /// // Wait for the stream to become writable + /// pollable.block(); + /// let Ok(n) = this.check-write(); // eliding error handling + /// let len = min(n, num_zeroes); + /// this.write-zeroes(len); // eliding error handling + /// num_zeroes -= len; + /// } + /// this.flush(); + /// // Wait for completion of `flush` + /// pollable.block(); + /// // Check for any errors that arose during `flush` + /// let _ = this.check-write(); // eliding error handling + /// ``` + @since(version = 0.2.0) + blocking-write-zeroes-and-flush: func( + /// The number of zero-bytes to write + len: u64 + ) -> result<_, stream-error>; + + /// Read from one stream and write to another. + /// + /// The behavior of splice is equivalent to: + /// 1. calling `check-write` on the `output-stream` + /// 2. calling `read` on the `input-stream` with the smaller of the + /// `check-write` permitted length and the `len` provided to `splice` + /// 3. calling `write` on the `output-stream` with that read data. + /// + /// Any error reported by the call to `check-write`, `read`, or + /// `write` ends the splice and reports that error. + /// + /// This function returns the number of bytes transferred; it may be less + /// than `len`. + @since(version = 0.2.0) + splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + + /// Read from one stream and write to another, with blocking. + /// + /// This is similar to `splice`, except that it blocks until the + /// `output-stream` is ready for writing, and the `input-stream` + /// is ready for reading, before performing the `splice`. + @since(version = 0.2.0) + blocking-splice: func( + /// The stream to read from + src: borrow, + /// The number of bytes to splice + len: u64, + ) -> result; + } +} diff --git a/websearch/wit/deps/wasi:io/world.wit b/websearch/wit/deps/wasi:io/world.wit new file mode 100644 index 000000000..f1d2102dc --- /dev/null +++ b/websearch/wit/deps/wasi:io/world.wit @@ -0,0 +1,10 @@ +package wasi:io@0.2.3; + +@since(version = 0.2.0) +world imports { + @since(version = 0.2.0) + import streams; + + @since(version = 0.2.0) + import poll; +} diff --git a/websearch/wit/golem-web-search.wit b/websearch/wit/golem-web-search.wit new file mode 100644 index 000000000..550ee50f0 --- /dev/null +++ b/websearch/wit/golem-web-search.wit @@ -0,0 +1,105 @@ +package golem:web-search@1.0.0; + +interface types { + /// Core structure for a single search result + record search-result { + title: string, + url: string, + snippet: string, + display-url: option, + source: option, + score: option, + html-snippet: option, + date-published: option, + images: option>, + content-chunks: option>, + } + + /// Optional image-related result data + record image-result { + url: string, + description: option, + } + + /// Optional metadata for a search session + record search-metadata { + query: string, + total-results: option, + search-time-ms: option, + safe-search: option, + language: option, + region: option, + next-page-token: option, + rate-limits: option, + current-page: u32, + } + + /// Safe search settings + enum safe-search-level { + off, + medium, + high, + } + + /// Rate limiting metadata + record rate-limit-info { + limit: u32, + remaining: u32, + reset-timestamp: u64, + } + + /// Query parameters accepted by the unified search API + record search-params { + query: string, + safe-search: option, + language: option, + region: option, + max-results: option, + time-range: option, + include-domains: option>, + exclude-domains: option>, + include-images: option, + include-html: option, + advanced-answer: option, + } + + /// Supported time range filtering + enum time-range { + day, + week, + month, + year, + } + + /// Structured search error + variant search-error { + invalid-query, + rate-limited(u32), + unsupported-feature(string), + backend-error(string), + } +} + +interface web-search { + use types.{search-params, search-result, search-metadata, search-error}; + + /// Represents an ongoing search session for pagination or streaming + resource search-session { + /// Get the next page of results + next-page: func() -> result, search-error>; + + /// Retrieve session metadata (after any query) + get-metadata: func() -> option; + } + + /// Start a search session, returning a search context + start-search: func(params: search-params) -> result; + + /// One-shot search that returns results immediately (limited result count) + search-once: func(params: search-params) -> result, option>, search-error>; +} + +world websearch-library { + export web-search; + export types; +} \ No newline at end of file