From 0772a6527f0f6a21c344b50badc0f554852bba5f Mon Sep 17 00:00:00 2001 From: bdbai Date: Fri, 30 May 2025 14:43:43 +0800 Subject: [PATCH 1/3] add raw url bindings --- curl-sys/Cargo.toml | 1 + curl-sys/lib.rs | 94 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/curl-sys/Cargo.toml b/curl-sys/Cargo.toml index 4d3b91828..12364c1b7 100644 --- a/curl-sys/Cargo.toml +++ b/curl-sys/Cargo.toml @@ -55,3 +55,4 @@ zlib-ng-compat = ["libz-sys/zlib-ng", "static-curl"] upkeep_7_62_0 = [] poll_7_68_0 = [] ntlm = [] +url = [] diff --git a/curl-sys/lib.rs b/curl-sys/lib.rs index f71d99aa5..4ebce60d4 100644 --- a/curl-sys/lib.rs +++ b/curl-sys/lib.rs @@ -1167,6 +1167,100 @@ extern "C" { ) -> CURLMcode; } +#[cfg(feature = "url")] +mod url { + use super::*; + + pub type CURLUCode = __enum_ty; + pub type CURLUPart = __enum_ty; + + pub const CURLUE_OK: CURLUCode = 0; + pub const CURLUE_BAD_HANDLE: CURLUCode = 1; + pub const CURLUE_BAD_PARTPOINTER: CURLUCode = 2; + pub const CURLUE_MALFORMED_INPUT: CURLUCode = 3; + pub const CURLUE_BAD_PORT_NUMBER: CURLUCode = 4; + pub const CURLUE_UNSUPPORTED_SCHEME: CURLUCode = 5; + pub const CURLUE_URLDECODE: CURLUCode = 6; + pub const CURLUE_OUT_OF_MEMORY: CURLUCode = 7; + pub const CURLUE_USER_NOT_ALLOWED: CURLUCode = 8; + pub const CURLUE_UNKNOWN_PART: CURLUCode = 9; + pub const CURLUE_NO_SCHEME: CURLUCode = 10; + pub const CURLUE_NO_USER: CURLUCode = 11; + pub const CURLUE_NO_PASSWORD: CURLUCode = 12; + pub const CURLUE_NO_OPTIONS: CURLUCode = 13; + pub const CURLUE_NO_HOST: CURLUCode = 14; + pub const CURLUE_NO_PORT: CURLUCode = 15; + pub const CURLUE_NO_QUERY: CURLUCode = 16; + pub const CURLUE_NO_FRAGMENT: CURLUCode = 17; + pub const CURLUE_NO_ZONEID: CURLUCode = 18; + pub const CURLUE_BAD_FILE_URL: CURLUCode = 19; + pub const CURLUE_BAD_FRAGMENT: CURLUCode = 20; + pub const CURLUE_BAD_HOSTNAME: CURLUCode = 21; + pub const CURLUE_BAD_IPV6: CURLUCode = 22; + pub const CURLUE_BAD_LOGIN: CURLUCode = 23; + pub const CURLUE_BAD_PASSWORD: CURLUCode = 24; + pub const CURLUE_BAD_PATH: CURLUCode = 25; + pub const CURLUE_BAD_QUERY: CURLUCode = 26; + pub const CURLUE_BAD_SCHEME: CURLUCode = 27; + pub const CURLUE_BAD_SLASHES: CURLUCode = 28; + pub const CURLUE_BAD_USER: CURLUCode = 29; + pub const CURLUE_LACKS_IDN: CURLUCode = 30; + pub const CURLUE_TOO_LARGE: CURLUCode = 31; + pub const CURLUE_LAST: CURLUCode = 32; + + pub const CURLUPART_URL: CURLUPart = 0; + pub const CURLUPART_SCHEME: CURLUPart = 1; + pub const CURLUPART_USER: CURLUPart = 2; + pub const CURLUPART_PASSWORD: CURLUPart = 3; + pub const CURLUPART_OPTIONS: CURLUPart = 4; + pub const CURLUPART_HOST: CURLUPart = 5; + pub const CURLUPART_PORT: CURLUPart = 6; + pub const CURLUPART_PATH: CURLUPart = 7; + pub const CURLUPART_QUERY: CURLUPart = 8; + pub const CURLUPART_FRAGMENT: CURLUPart = 9; + pub const CURLUPART_ZONEID: CURLUPart = 10; + + pub const CURLU_DEFAULT_PORT: c_uint = 1 << 0; + pub const CURLU_NO_DEFAULT_PORT: c_uint = 1 << 1; + pub const CURLU_DEFAULT_SCHEME: c_uint = 1 << 2; + pub const CURLU_NON_SUPPORT_SCHEME: c_uint = 1 << 3; + pub const CURLU_PATH_AS_IS: c_uint = 1 << 4; + pub const CURLU_DISALLOW_USER: c_uint = 1 << 5; + pub const CURLU_URLDECODE: c_uint = 1 << 6; + pub const CURLU_URLENCODE: c_uint = 1 << 7; + pub const CURLU_APPENDQUERY: c_uint = 1 << 8; + pub const CURLU_GUESS_SCHEME: c_uint = 1 << 9; + pub const CURLU_NO_AUTHORITY: c_uint = 1 << 10; + pub const CURLU_ALLOW_SPACE: c_uint = 1 << 11; + pub const CURLU_PUNYCODE: c_uint = 1 << 12; + pub const CURLU_PUNY2IDN: c_uint = 1 << 13; + pub const CURLU_GET_EMPTY: c_uint = 1 << 14; + pub const CURLU_NO_GUESS_SCHEME: c_uint = 1 << 15; + + pub enum CURLU {} + + extern "C" { + pub fn curl_url() -> *mut CURLU; + pub fn curl_url_cleanup(handle: *mut CURLU); + pub fn curl_url_dup(in_: *const CURLU) -> *mut CURLU; + pub fn curl_url_get( + handle: *const CURLU, + what: CURLUPart, + part: *mut *mut c_char, + flags: c_uint, + ) -> CURLUCode; + pub fn curl_url_set( + handle: *mut CURLU, + what: CURLUPart, + part: *const c_char, + flags: c_uint, + ) -> CURLUCode; + pub fn curl_url_strerror(code: CURLUCode) -> *const c_char; + } +} +#[cfg(feature = "url")] +pub use url::*; + pub fn rust_crate_version() -> &'static str { env!("CARGO_PKG_VERSION") } From 0056b0c8b49f8bb44f3fac0a48ed8622e8346624 Mon Sep 17 00:00:00 2001 From: bdbai Date: Tue, 3 Jun 2025 17:06:37 +0800 Subject: [PATCH 2/3] add url --- Cargo.toml | 1 + curl-sys/lib.rs | 77 ++++---- src/lib.rs | 2 + src/url.rs | 282 ++++++++++++++++++++++++++++++ src/url/error.rs | 84 +++++++++ src/url/get_flags.rs | 125 +++++++++++++ src/url/handle.rs | 408 +++++++++++++++++++++++++++++++++++++++++++ src/url/set_flags.rs | 104 +++++++++++ 8 files changed, 1045 insertions(+), 38 deletions(-) create mode 100644 src/url.rs create mode 100644 src/url/error.rs create mode 100644 src/url/get_flags.rs create mode 100644 src/url/handle.rs create mode 100644 src/url/set_flags.rs diff --git a/Cargo.toml b/Cargo.toml index a3efedc3a..68a0c2d5d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,7 @@ zlib-ng-compat = ["curl-sys/zlib-ng-compat", "static-curl"] upkeep_7_62_0 = ["curl-sys/upkeep_7_62_0"] poll_7_68_0 = ["curl-sys/poll_7_68_0"] ntlm = ["curl-sys/ntlm"] +url = ["curl-sys/url"] [[test]] name = "atexit" diff --git a/curl-sys/lib.rs b/curl-sys/lib.rs index 4ebce60d4..6f77f9a46 100644 --- a/curl-sys/lib.rs +++ b/curl-sys/lib.rs @@ -1171,42 +1171,42 @@ extern "C" { mod url { use super::*; - pub type CURLUCode = __enum_ty; + pub type CURLUcode = __enum_ty; pub type CURLUPart = __enum_ty; - pub const CURLUE_OK: CURLUCode = 0; - pub const CURLUE_BAD_HANDLE: CURLUCode = 1; - pub const CURLUE_BAD_PARTPOINTER: CURLUCode = 2; - pub const CURLUE_MALFORMED_INPUT: CURLUCode = 3; - pub const CURLUE_BAD_PORT_NUMBER: CURLUCode = 4; - pub const CURLUE_UNSUPPORTED_SCHEME: CURLUCode = 5; - pub const CURLUE_URLDECODE: CURLUCode = 6; - pub const CURLUE_OUT_OF_MEMORY: CURLUCode = 7; - pub const CURLUE_USER_NOT_ALLOWED: CURLUCode = 8; - pub const CURLUE_UNKNOWN_PART: CURLUCode = 9; - pub const CURLUE_NO_SCHEME: CURLUCode = 10; - pub const CURLUE_NO_USER: CURLUCode = 11; - pub const CURLUE_NO_PASSWORD: CURLUCode = 12; - pub const CURLUE_NO_OPTIONS: CURLUCode = 13; - pub const CURLUE_NO_HOST: CURLUCode = 14; - pub const CURLUE_NO_PORT: CURLUCode = 15; - pub const CURLUE_NO_QUERY: CURLUCode = 16; - pub const CURLUE_NO_FRAGMENT: CURLUCode = 17; - pub const CURLUE_NO_ZONEID: CURLUCode = 18; - pub const CURLUE_BAD_FILE_URL: CURLUCode = 19; - pub const CURLUE_BAD_FRAGMENT: CURLUCode = 20; - pub const CURLUE_BAD_HOSTNAME: CURLUCode = 21; - pub const CURLUE_BAD_IPV6: CURLUCode = 22; - pub const CURLUE_BAD_LOGIN: CURLUCode = 23; - pub const CURLUE_BAD_PASSWORD: CURLUCode = 24; - pub const CURLUE_BAD_PATH: CURLUCode = 25; - pub const CURLUE_BAD_QUERY: CURLUCode = 26; - pub const CURLUE_BAD_SCHEME: CURLUCode = 27; - pub const CURLUE_BAD_SLASHES: CURLUCode = 28; - pub const CURLUE_BAD_USER: CURLUCode = 29; - pub const CURLUE_LACKS_IDN: CURLUCode = 30; - pub const CURLUE_TOO_LARGE: CURLUCode = 31; - pub const CURLUE_LAST: CURLUCode = 32; + pub const CURLUE_OK: CURLUcode = 0; + pub const CURLUE_BAD_HANDLE: CURLUcode = 1; + pub const CURLUE_BAD_PARTPOINTER: CURLUcode = 2; + pub const CURLUE_MALFORMED_INPUT: CURLUcode = 3; + pub const CURLUE_BAD_PORT_NUMBER: CURLUcode = 4; + pub const CURLUE_UNSUPPORTED_SCHEME: CURLUcode = 5; + pub const CURLUE_URLDECODE: CURLUcode = 6; + pub const CURLUE_OUT_OF_MEMORY: CURLUcode = 7; + pub const CURLUE_USER_NOT_ALLOWED: CURLUcode = 8; + pub const CURLUE_UNKNOWN_PART: CURLUcode = 9; + pub const CURLUE_NO_SCHEME: CURLUcode = 10; + pub const CURLUE_NO_USER: CURLUcode = 11; + pub const CURLUE_NO_PASSWORD: CURLUcode = 12; + pub const CURLUE_NO_OPTIONS: CURLUcode = 13; + pub const CURLUE_NO_HOST: CURLUcode = 14; + pub const CURLUE_NO_PORT: CURLUcode = 15; + pub const CURLUE_NO_QUERY: CURLUcode = 16; + pub const CURLUE_NO_FRAGMENT: CURLUcode = 17; + pub const CURLUE_NO_ZONEID: CURLUcode = 18; + pub const CURLUE_BAD_FILE_URL: CURLUcode = 19; + pub const CURLUE_BAD_FRAGMENT: CURLUcode = 20; + pub const CURLUE_BAD_HOSTNAME: CURLUcode = 21; + pub const CURLUE_BAD_IPV6: CURLUcode = 22; + pub const CURLUE_BAD_LOGIN: CURLUcode = 23; + pub const CURLUE_BAD_PASSWORD: CURLUcode = 24; + pub const CURLUE_BAD_PATH: CURLUcode = 25; + pub const CURLUE_BAD_QUERY: CURLUcode = 26; + pub const CURLUE_BAD_SCHEME: CURLUcode = 27; + pub const CURLUE_BAD_SLASHES: CURLUcode = 28; + pub const CURLUE_BAD_USER: CURLUcode = 29; + pub const CURLUE_LACKS_IDN: CURLUcode = 30; + pub const CURLUE_TOO_LARGE: CURLUcode = 31; + pub const CURLUE_LAST: CURLUcode = 32; pub const CURLUPART_URL: CURLUPart = 0; pub const CURLUPART_SCHEME: CURLUPart = 1; @@ -1218,7 +1218,7 @@ mod url { pub const CURLUPART_PATH: CURLUPart = 7; pub const CURLUPART_QUERY: CURLUPart = 8; pub const CURLUPART_FRAGMENT: CURLUPart = 9; - pub const CURLUPART_ZONEID: CURLUPart = 10; + pub const CURLUPART_ZONEID: CURLUPart = 10; // Added in 7.65.0 pub const CURLU_DEFAULT_PORT: c_uint = 1 << 0; pub const CURLU_NO_DEFAULT_PORT: c_uint = 1 << 1; @@ -1248,14 +1248,15 @@ mod url { what: CURLUPart, part: *mut *mut c_char, flags: c_uint, - ) -> CURLUCode; + ) -> CURLUcode; pub fn curl_url_set( handle: *mut CURLU, what: CURLUPart, part: *const c_char, flags: c_uint, - ) -> CURLUCode; - pub fn curl_url_strerror(code: CURLUCode) -> *const c_char; + ) -> CURLUcode; + // Added in 7.80.0 + // pub fn curl_url_strerror(code: CURLUCode) -> *const c_char; } } #[cfg(feature = "url")] diff --git a/src/lib.rs b/src/lib.rs index 2965e2bed..ec6bd4e66 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,6 +69,8 @@ mod version; pub mod easy; pub mod multi; mod panic; +#[cfg(feature = "url")] +pub mod url; #[cfg(test)] static INITIALIZED: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false); diff --git a/src/url.rs b/src/url.rs new file mode 100644 index 000000000..c639a6afb --- /dev/null +++ b/src/url.rs @@ -0,0 +1,282 @@ +//! libcurl URL parsing, generation, and manipulation. + +mod error; +mod get_flags; +mod handle; +mod set_flags; + +pub use error::Error; +pub use get_flags::GetFlags; +pub use handle::Url; +pub use set_flags::SetFlags; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_url_new() { + let mut url = Url::new().unwrap(); + url.set_url("https://www.rust-lang.org/", SetFlags::new()) + .unwrap(); + let full_url = url.get_url(GetFlags::new()).unwrap(); + assert_eq!(full_url, "https://www.rust-lang.org/"); + url.clear_url().unwrap(); + } + + fn test_component_set_get_clear( + val: &str, + set: fn(&mut Url, &str, SetFlags) -> Result<(), Error>, + get: fn(&Url, GetFlags) -> Result, Error>, + clear: fn(&mut Url) -> Result<(), Error>, + ) { + let mut url = Url::new().unwrap(); + set(&mut url, val, SetFlags::new()).unwrap(); + let retrieved = get(&url, GetFlags::new()).unwrap(); + assert_eq!(retrieved, Some(val.into())); + clear(&mut url).unwrap(); + let retrieved = get(&url, GetFlags::new()).unwrap(); + assert_eq!(retrieved, None); + } + + #[test] + fn test_url_parts() { + test_component_set_get_clear("http", Url::set_scheme, Url::get_scheme, Url::clear_scheme); + test_component_set_get_clear("alex", Url::set_user, Url::get_user, Url::clear_user); + test_component_set_get_clear( + "secret", + Url::set_password, + Url::get_password, + Url::clear_password, + ); + test_component_set_get_clear( + "opt", + Url::set_options, + Url::get_options, + Url::clear_options, + ); + test_component_set_get_clear( + "www.rust-lang.org", + Url::set_host, + Url::get_host, + Url::clear_host, + ); + test_component_set_get_clear( + "eth0", + Url::set_zone_id, + Url::get_zone_id, + Url::clear_zone_id, + ); + + let mut url = Url::new().unwrap(); + url.set_port(10086, SetFlags::new()).unwrap(); + let port = url.get_port(GetFlags::new()).unwrap(); + assert_eq!(port, Some(10086)); + url.clear_port().unwrap(); + let port = url.get_port(GetFlags::new()).unwrap(); + assert_eq!(port, None); + + url.set_path("/community", SetFlags::new()).unwrap(); + let path = url.get_path(GetFlags::new()).unwrap(); + assert_eq!(path, "/community".to_string()); + url.set_path("", SetFlags::new()).unwrap(); + let path = url.get_path(GetFlags::new()).unwrap(); + assert_eq!(path, "/".to_string()); + + test_component_set_get_clear("a=1&b=2", Url::set_query, Url::get_query, Url::clear_query); + test_component_set_get_clear( + "fragment", + Url::set_fragment, + Url::get_fragment, + Url::clear_fragment, + ); + } + + #[test] + fn test_url_parser() { + let mut url = Url::new().unwrap(); + url.set_url( + "https://user:password@[::1%eth0]:8080/path?query#fragment", + SetFlags::new(), + ) + .unwrap(); + + let empty_flags = GetFlags::new(); + assert_eq!(url.get_scheme(empty_flags).unwrap(), Some("https".into())); + assert_eq!(url.get_user(empty_flags).unwrap(), Some("user".into())); + assert_eq!( + url.get_password(empty_flags).unwrap(), + Some("password".into()) + ); + assert_eq!(url.get_host(empty_flags).unwrap(), Some("[::1]".into())); + assert_eq!(url.get_port(empty_flags).unwrap(), Some(8080)); + assert_eq!(url.get_path(empty_flags).unwrap(), "/path".to_string()); + assert_eq!(url.get_query(empty_flags).unwrap(), Some("query".into())); + assert_eq!( + url.get_fragment(empty_flags).unwrap(), + Some("fragment".into()) + ); + assert_eq!(url.get_zone_id(empty_flags).unwrap(), Some("eth0".into())); + assert_eq!( + url.get_url(empty_flags).unwrap(), + "https://user:password@[::1%25eth0]:8080/path?query#fragment" + ); + } + + #[test] + fn test_url_get_flags() { + let mut url = Url::new().unwrap(); + url.set_url("https://www.rust-lang.org/", SetFlags::new()) + .unwrap(); + assert_eq!(url.get_port(GetFlags::new()).unwrap(), None); + assert_eq!( + url.get_port(GetFlags::new().default_port()).unwrap(), + Some(443) + ); + + url.clear_scheme().unwrap(); + url.set_host("www.rust-lang.org", SetFlags::new()).unwrap(); + assert_eq!( + url.get_url(GetFlags::new()).unwrap_err().code(), + curl_sys::CURLUE_NO_SCHEME + ); + assert_eq!( + url.get_url(GetFlags::new().default_scheme()).unwrap(), + "https://www.rust-lang.org/" + ); + + url.set_url("https://www.rust-lang.org:443/", SetFlags::new()) + .unwrap(); + assert_eq!( + url.get_url(GetFlags::new()).unwrap(), + "https://www.rust-lang.org:443/" + ); + assert_eq!( + url.get_url(GetFlags::new().no_default_port()).unwrap(), + "https://www.rust-lang.org/" + ); + + url.set_url("https://www.rust-lang.org/a%20b", SetFlags::new()) + .unwrap(); + assert_eq!(url.get_path(GetFlags::new()).unwrap(), "/a%20b"); + assert_eq!(url.get_path(GetFlags::new().urldecode()).unwrap(), "/a b"); + + url.set_host("茹斯特", SetFlags::new()).unwrap(); + assert_eq!( + url.get_host(GetFlags::new().urlencode()).unwrap(), + Some("%e8%8c%b9%e6%96%af%e7%89%b9".into()) + ); + + // FIXME: IDN support may not be available in all builds of libcurl. + // assert_eq!( + // url.get_host(GetFlags::new().punycode()).unwrap(), + // Some("xn--dfvq8zr5m".into()) + // ); + + // url.set_host("xn--dfvq8zr5m", SetFlags::new()).unwrap(); + // assert_eq!( + // url.get_host(GetFlags::new().punycode2idn()).unwrap(), + // Some("茹斯特".into()) + // ); + + url.set_query("", SetFlags::new()).unwrap(); + assert_eq!(url.get_query(GetFlags::new()).unwrap(), None); + assert_eq!( + url.get_query(GetFlags::new().get_empty()).unwrap(), + Some("".into()) + ); + } + + #[test] + fn test_url_guess_scheme() { + let mut url = Url::new().unwrap(); + url.set_url("www.rust-lang.org", SetFlags::new().guess_scheme()) + .unwrap(); + + assert_eq!( + url.get_scheme(GetFlags::new().no_guess_scheme()).unwrap(), + None + ); + assert_eq!( + url.get_scheme(GetFlags::new()).unwrap(), + Some("http".into()) + ); + } + + #[test] + fn test_url_set_flags() { + let mut url = Url::new().unwrap(); + url.set_query("a=1", SetFlags::new()).unwrap(); + + url.set_query("b=2", SetFlags::new().append_query()) + .unwrap(); + assert_eq!( + url.get_query(GetFlags::new()).unwrap(), + Some("a=1&b=2".into()) + ); + + assert_eq!( + url.set_scheme("rust", SetFlags::new()).unwrap_err().code(), + curl_sys::CURLUE_UNSUPPORTED_SCHEME + ); + assert!(url + .set_scheme("rust", SetFlags::new().non_support_scheme()) + .is_ok()); + + url.set_path("/a b", SetFlags::new()).unwrap(); + assert_eq!(url.get_path(GetFlags::new()).unwrap(), "/a b"); + url.set_path("/a b", SetFlags::new().urlencode()).unwrap(); + assert_eq!(url.get_path(GetFlags::new()).unwrap(), "/a%20b"); + + assert_eq!( + url.set_url("www.rust-lang.org", SetFlags::new()) + .unwrap_err() + .code(), + curl_sys::CURLUE_BAD_SCHEME + ); + assert!(url + .set_url("www.rust-lang.org", SetFlags::new().default_scheme()) + .is_ok()); + assert_eq!( + url.get_url(GetFlags::new()).unwrap(), + "https://www.rust-lang.org/" + ); + + url.clear_url().unwrap(); + assert!(url + .set_url("www.rust-lang.org", SetFlags::new().guess_scheme()) + .is_ok()); + assert_eq!( + url.get_url(GetFlags::new()).unwrap(), + "http://www.rust-lang.org/" + ); + + assert_eq!( + url.set_url("http:///", SetFlags::new()).unwrap_err().code(), + curl_sys::CURLUE_NO_HOST + ); + assert!(url + .set_url("http:///", SetFlags::new().no_authority()) + .is_ok()); + + url.set_url("https://www.rust-lang.org/././foo", SetFlags::new()) + .unwrap(); + assert_eq!(url.get_path(GetFlags::new()).unwrap(), "/foo"); + url.set_url( + "https://www.rust-lang.org/././foo", + SetFlags::new().path_as_is(), + ) + .unwrap(); + assert_eq!(url.get_path(GetFlags::new()).unwrap(), "/././foo"); + + assert!(url + .set_url("https://www.rust-lang.org/ a", SetFlags::new()) + .is_err()); + assert!(url + .set_url( + "https://www.rust-lang.org/ a", + SetFlags::new().allow_space() + ) + .is_ok()); + } +} diff --git a/src/url/error.rs b/src/url/error.rs new file mode 100644 index 000000000..229f80631 --- /dev/null +++ b/src/url/error.rs @@ -0,0 +1,84 @@ +use std::{ + fmt::{self}, + io, +}; + +/// Represents an error that can occur when working with URLs in libcurl. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct Error { + code: curl_sys::CURLUcode, +} + +impl Error { + /// Creates a new `Error` from a `CURLUcode`. + pub fn new(code: curl_sys::CURLUcode) -> Self { + Self { code } + } + + /// Returns the underlying `CURLUcode` of the error. + pub fn code(&self) -> curl_sys::CURLUcode { + self.code + } + + /// Returns a string representation of the error. + pub fn description(&self) -> &'static str { + use curl_sys::*; + + match self.code { + CURLUE_OK => "No error", + CURLUE_BAD_HANDLE => "An invalid CURLU pointer was passed as argument", + CURLUE_BAD_PARTPOINTER => "An invalid 'part' argument was passed as argument", + CURLUE_MALFORMED_INPUT => "Malformed input to a URL function", + CURLUE_BAD_PORT_NUMBER => "Port number was not a decimal number between 0 and 65535", + CURLUE_UNSUPPORTED_SCHEME => "Unsupported URL scheme", + CURLUE_URLDECODE => "URL decode error, most likely because of rubbish in the input", + CURLUE_OUT_OF_MEMORY => "A memory function failed", + CURLUE_USER_NOT_ALLOWED => "Credentials was passed in the URL when prohibited", + CURLUE_UNKNOWN_PART => "An unknown part ID was passed to a URL API function", + CURLUE_NO_SCHEME => "No scheme part in the URL", + CURLUE_NO_USER => "No user part in the URL", + CURLUE_NO_PASSWORD => "No password part in the URL", + CURLUE_NO_OPTIONS => "No options part in the URL", + CURLUE_NO_HOST => "No host part in the URL", + CURLUE_NO_PORT => "No port part in the URL", + CURLUE_NO_QUERY => "No query part in the URL", + CURLUE_NO_FRAGMENT => "No fragment part in the URL", + CURLUE_NO_ZONEID => "No zoneid part in the URL", + CURLUE_BAD_LOGIN => "Bad login part", + CURLUE_BAD_IPV6 => "Bad IPv6 address", + CURLUE_BAD_HOSTNAME => "Bad hostname", + CURLUE_BAD_FILE_URL => "Bad file:// URL", + CURLUE_BAD_SLASHES => "Unsupported number of slashes following scheme", + CURLUE_BAD_SCHEME => "Bad scheme", + CURLUE_BAD_PATH => "Bad path", + CURLUE_BAD_FRAGMENT => "Bad fragment", + CURLUE_BAD_QUERY => "Bad query", + CURLUE_BAD_PASSWORD => "Bad password", + CURLUE_BAD_USER => "Bad user", + CURLUE_LACKS_IDN => "libcurl lacks IDN support", + CURLUE_TOO_LARGE => "A value or data field is larger than allowed", + _ => "Error", + } + } +} + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Error") + .field("code", &self.code) + .field("description", &self.description()) + .finish() + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.description().fmt(f) + } +} + +impl From for io::Error { + fn from(err: Error) -> Self { + io::Error::new(io::ErrorKind::Other, err.description()) + } +} diff --git a/src/url/get_flags.rs b/src/url/get_flags.rs new file mode 100644 index 000000000..b2ae054cf --- /dev/null +++ b/src/url/get_flags.rs @@ -0,0 +1,125 @@ +use std::ffi::c_uint; +use std::fmt; + +/// A set of options that can be used with `Url::get*` methods to modify the +/// behavior of URL retrieval. +#[derive(Clone, Copy, Default, PartialEq)] +pub struct GetFlags(pub(super) c_uint); + +impl GetFlags { + /// Creates a new [`GetFlags`] instance with no options set. + pub const fn new() -> Self { + Self(0) + } + + /// If the handle has no port stored, this option makes curl_url_get return + /// the default port for the used scheme. + pub const fn default_port(&self) -> Self { + Self(self.0 | curl_sys::CURLU_DEFAULT_PORT) + } + + /// If the handle has no scheme stored, this option makes curl_url_get + /// return the default scheme instead of error. + pub const fn default_scheme(&self) -> Self { + Self(self.0 | curl_sys::CURLU_DEFAULT_SCHEME) + } + + /// Instructs libcurl to not return a port number if it matches the + /// default port for the scheme. + pub const fn no_default_port(&self) -> Self { + Self(self.0 | curl_sys::CURLU_NO_DEFAULT_PORT) + } + + /// Asks libcurl to URL decode the contents before returning it. It does + /// not decode the scheme, the port number or the full URL. + /// + /// The query component also gets plus-to-space conversion as a bonus when + /// this bit is set. + /// + /// Note that this URL decoding is charset unaware and you get a zero + /// terminated string back with data that could be intended for a + /// particular encoding. + /// + /// If there are byte values lower than 32 in the decoded string, the get + /// operation returns an error instead. + pub const fn urldecode(&self) -> Self { + Self(self.0 | curl_sys::CURLU_URLDECODE) + } + + /// If set, libcurl encodes the hostname part when a full URL is retrieved. + /// If not set (default), libcurl returns the URL with the hostname raw to + /// support IDN names to appear as-is. IDN hostnames are typically using + /// non-ASCII bytes that otherwise gets percent-encoded. + /// + /// Note that even when not asking for URL encoding, the '%' (byte 37) is + /// URL encoded to make sure the hostname remains valid. + pub const fn urlencode(&self) -> Self { + Self(self.0 | curl_sys::CURLU_URLENCODE) + } + + /// If set and [`GetFlags::urlencode()`] is not set, and asked to + /// retrieve the host or full URL parts, libcurl returns the host name in + /// its punycode version if it contains any non-ASCII octets (and is an + /// IDN name). + /// + /// If libcurl is built without IDN capabilities, using this bit makes + /// curl_url_get return `LACKS_IDN` if the hostname contains anything + /// outside the ASCII range. + /// + /// Added in curl 7.88.0. + pub const fn punycode(&self) -> Self { + Self(self.0 | curl_sys::CURLU_PUNYCODE) + } + + /// If set and asked to retrieve the CURLUPART_HOST or CURLUPART_URL parts, + /// libcurl returns the hostname in its IDN (International Domain Name) + /// UTF-8 version if it otherwise is a punycode version. If the punycode + /// name cannot be converted to IDN correctly, libcurl returns + /// `CURLUE_BAD_HOSTNAME`. + /// + /// If libcurl is built without IDN capabilities, using this bit makes + /// libcurl return `CURLUE_LACKS_IDN` if the hostname is using punycode. + /// + /// Added in curl 8.3.0 + pub const fn punycode2idn(&self) -> Self { + Self(self.0 | curl_sys::CURLU_PUNY2IDN) + } + + /// When this flag is used, it makes the function return empty query and + /// fragments parts or when used in the full URL. By default, libcurl + /// otherwise considers empty parts non-existing. + /// + /// An empty query part is one where this is nothing following the question + /// mark (before the possible fragment). An empty fragments part is one + /// where there is nothing following the hash sign. + /// + /// Added in curl 8.8.0 + pub const fn get_empty(&self) -> Self { + Self(self.0 | curl_sys::CURLU_GET_EMPTY) + } + + /// When this flag is used, it treats the scheme as non-existing if it was + /// set as a result of a previous guess; when `guess_scheme` was used + /// parsing a URL. + /// + /// Using this flag when getting scheme if the scheme was set as the result + /// of a guess makes libcurl return `CURLUE_NO_SCHEME`. + /// + /// Using this flag when getting full URL if the scheme was set as the + /// result of a guess makes libcurl return the full URL without the scheme + /// component. Such a URL can then only be parsed with `set_url` if + /// `CURLU_GUESS_SCHEME` is used. + /// + /// Added in curl 8.9.0 + pub const fn no_guess_scheme(&self) -> Self { + Self(self.0 | curl_sys::CURLU_NO_GUESS_SCHEME) + } +} + +impl fmt::Debug for GetFlags { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("GetFlags") + .field("value", &format!("{:032b}", self.0).as_str()) + .finish() + } +} diff --git a/src/url/handle.rs b/src/url/handle.rs new file mode 100644 index 000000000..1aafd4ebf --- /dev/null +++ b/src/url/handle.rs @@ -0,0 +1,408 @@ +use std::ffi::{c_char, c_void}; +use std::fmt; +use std::ptr::null; + +use curl_sys::CURLU; + +use super::{Error, GetFlags, SetFlags}; + +/// A libcurl URL object that holds or can hold URL components for a single URL +pub struct Url { + raw: *mut CURLU, +} + +impl Url { + /// Allocate a new URL object. + pub fn new() -> Result { + let raw = unsafe { curl_sys::curl_url() }; + assert!(!raw.is_null()); + Ok(Self { raw }) + } + + /// Return the raw pointer to the underlying CURLU handle. + pub fn as_raw(&self) -> *mut CURLU { + self.raw + } + + fn ffi_set( + &mut self, + part: curl_sys::CURLUPart, + value: Option<&str>, + flags: SetFlags, + ) -> Result<(), Error> { + let c_value = value + .map(|value| { + std::ffi::CString::new(value) + .map_err(|_| Error::new(curl_sys::CURLUE_MALFORMED_INPUT)) + }) + .transpose()?; + let ptr = c_value.as_ref().map(|s| s.as_ptr()).unwrap_or(null()); + let code = unsafe { curl_sys::curl_url_set(self.raw, part, ptr, flags.0) }; + cvt(code) + } + + fn ffi_get( + &self, + part: curl_sys::CURLUPart, + flags: GetFlags, + allowing: curl_sys::CURLUcode, + ) -> Result, Error> { + let mut curl_str: *mut c_char = std::ptr::null_mut(); + let code = unsafe { curl_sys::curl_url_get(self.raw, part, &mut curl_str, flags.0) }; + if code != allowing { + cvt(code)?; + } + if curl_str.is_null() { + return Ok(None); + } + struct CurlStr(*mut c_char); + impl Drop for CurlStr { + fn drop(&mut self) { + unsafe { curl_sys::curl_free(self.0 as *mut c_void) }; + } + } + let curl_str = CurlStr(curl_str); + let res = unsafe { std::ffi::CStr::from_ptr(curl_str.0) }.to_str(); + match res { + Ok(s) => Ok(Some(s.to_owned())), + Err(_) => Err(Error::new(curl_sys::CURLUE_MALFORMED_INPUT)), + } + } + + /// Set the URL to empty. + pub fn clear_url(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_URL, None, SetFlags::new()) + } + + /// Replace the full URL. If the URL object is already populated, the new + /// URL can be relative to the previous. + /// + /// When successfully setting a new URL, relative or absolute, the URL + /// content is replaced with the components of the newly set URL. + /// + /// The input `url` must point to a correctly formatted "RFC 3986+" URL. + /// The URL parser only understands and parses the subset of URLS that are + /// "hierarchical" and therefore contain a `://` separator - not the ones + /// that are normally specified with only a colon separator. + /// + /// By default this API only parses URLs using schemes for protocols that + /// are supported built-in. To make libcurl parse URLs generically even for + /// schemes it does not know about, the + /// [`SetFlags::non_support_scheme()`] option must be set. Otherwise, + /// this function returns `UNSUPPORTED_SCHEME` for URL schemes it does not + /// recognize. + /// + /// Unless [`SetFlags::no_authority()`] is set, a blank hostname is not + /// allowed in the URL. + /// + /// When a full URL is set (parsed), the hostname component is stored URL + /// decoded. + /// + /// It is considered fine to set a blank URL (`""`) as a redirect, but not + /// as a normal URL. Therefore, setting a `""`` URL works fine if the + /// handle already holds a URL, otherwise it triggers an error. + pub fn set_url(&mut self, url: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_URL, Some(url), flags) + } + + /// Return the slightly cleaned up version of full URL using all available parts. + /// + /// We advise using the [`GetFlags::punycode()`] option to get the URL as + /// "normalized" as possible since IDN allows hostnames to be written in + /// many different ways that still end up the same punycode version. + /// + /// Zero-length queries and fragments are excluded from the URL unless + /// [`GetFlags::get_empty()`] is set. + pub fn get_url(&self, flags: GetFlags) -> Result { + let url = self.ffi_get(curl_sys::CURLUPART_URL, flags, curl_sys::CURLUE_OK)?; + Ok(url.unwrap_or_default()) + } + + /// Get the scheme part of the URL. + /// + /// Scheme cannot be URL decoded on set. libcurl only accepts setting + /// schemes up to 40 bytes long. + pub fn get_scheme(&self, flags: GetFlags) -> Result, Error> { + self.ffi_get( + curl_sys::CURLUPART_SCHEME, + flags, + curl_sys::CURLUE_NO_SCHEME, + ) + } + + /// Set the scheme part of the URL. + /// + /// Scheme cannot be URL decoded on set. libcurl only accepts setting + /// schemes up to 40 bytes long. + pub fn set_scheme(&mut self, scheme: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_SCHEME, Some(scheme), flags) + } + + /// Clear the scheme part of the URL. + pub fn clear_scheme(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_SCHEME, None, SetFlags::new()) + } + + /// Get the user part of the URL. + pub fn get_user(&self, flags: GetFlags) -> Result, Error> { + self.ffi_get(curl_sys::CURLUPART_USER, flags, curl_sys::CURLUE_NO_USER) + } + + /// Set the user part of the URL. + /// + /// If only the user part is set and not the password, the URL is + /// represented with a blank password. + pub fn set_user(&mut self, user: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_USER, Some(user), flags) + } + + /// Clear the user part of the URL. + pub fn clear_user(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_USER, None, SetFlags::new()) + } + + /// Get the password part of the URL. + pub fn get_password(&self, flags: GetFlags) -> Result, Error> { + self.ffi_get( + curl_sys::CURLUPART_PASSWORD, + flags, + curl_sys::CURLUE_NO_PASSWORD, + ) + } + + /// Set the password part of the URL. + /// + /// If only the password part is set and not the user, the URL is + /// represented with a blank user. + pub fn set_password(&mut self, password: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_PASSWORD, Some(password), flags) + } + + /// Clear the password part of the URL. + pub fn clear_password(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_PASSWORD, None, SetFlags::new()) + } + + /// Get the options part of the URL. + /// + /// The options field is an optional field that might follow the password + /// in the userinfo part. It is only recognized/used when parsing URLs for + /// the following schemes: pop3, smtp and imap. The URL API still allows + /// users to set and get this field independently of scheme when not + /// parsing full URLs. + pub fn get_options(&self, flags: GetFlags) -> Result, Error> { + self.ffi_get( + curl_sys::CURLUPART_OPTIONS, + flags, + curl_sys::CURLUE_NO_OPTIONS, + ) + } + + /// Set the options part of the URL. + /// + /// The options field is an optional field that might follow the password + /// in the userinfo part. It is only recognized/used when parsing URLs for + /// the following schemes: pop3, smtp and imap. This function however allows + /// users to independently set this field. + pub fn set_options(&mut self, options: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_OPTIONS, Some(options), flags) + } + + /// Clear the options part of the URL. + pub fn clear_options(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_OPTIONS, None, SetFlags::new()) + } + + /// Get the host part of the URL. + /// + /// If it is an IPv6 numeric address, the zone id is not part of it but is provided separately in CURLUPART_ZONEID. IPv6 numerical addresses are returned within brackets ([]). + /// + /// IPv6 names are normalized when set, which should make them as short as possible while maintaining correct syntax. + pub fn get_host(&self, flags: GetFlags) -> Result, Error> { + self.ffi_get(curl_sys::CURLUPART_HOST, flags, curl_sys::CURLUE_NO_HOST) + } + + /// Set the host part of the URL. + /// + /// If it is International Domain Name (IDN) the string must then be + /// encoded as your locale says or UTF-8 (when WinIDN is used). If it is a + /// bracketed IPv6 numeric address it may contain a zone id (or you can use + /// [`Url::set_zone_id()`]). + /// + /// Note that if you set an IPv6 address, it gets ruined and causes an + /// error if you also set [`SetFlags::urlencode()`]. + /// + /// Unless [`SetFlags::no_authority()`] is set, a blank hostname is not + /// allowed to set. + pub fn set_host(&mut self, host: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_HOST, Some(host), flags) + } + + /// Clear the host part of the URL. + pub fn clear_host(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_HOST, None, SetFlags::new()) + } + + /// Get the zone id part of the URL. + /// + /// If the hostname is a numeric IPv6 address, this field might also be set. + pub fn get_zone_id(&self, flags: GetFlags) -> Result, Error> { + self.ffi_get( + curl_sys::CURLUPART_ZONEID, + flags, + curl_sys::CURLUE_NO_ZONEID, + ) + } + + /// Set the zone id part of the URL. + /// + /// If the hostname is a numeric IPv6 address, this field can also be set. + pub fn set_zone_id(&mut self, zone_id: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_ZONEID, Some(zone_id), flags) + } + + /// Clear the zone id part of the URL. + pub fn clear_zone_id(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_ZONEID, None, SetFlags::new()) + } + + /// Get the port part of the URL. + /// + /// A port cannot be URL decoded on get. + pub fn get_port(&self, flags: GetFlags) -> Result, Error> { + let port = self.ffi_get(curl_sys::CURLUPART_PORT, flags, curl_sys::CURLUE_NO_PORT)?; + port.map(|s| { + s.parse::() + .map_err(|_| Error::new(curl_sys::CURLUE_MALFORMED_INPUT)) + }) + .transpose() + } + + /// Set the port part of the URL. + /// + /// The port number cannot be URL encoded on set. + pub fn set_port(&mut self, port: u16, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_PORT, Some(&port.to_string()), flags) + } + + /// Clear the port part of the URL. + pub fn clear_port(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_PORT, None, SetFlags::new()) + } + + /// Get the path part of the URL. + /// + /// The part is always at least a slash ('/') even if no path was supplied + /// in the URL. A URL path always starts with a slash. + pub fn get_path(&self, flags: GetFlags) -> Result { + let path = self.ffi_get(curl_sys::CURLUPART_PATH, flags, curl_sys::CURLUE_OK)?; + Ok(path.unwrap_or_else(|| "/".to_string())) + } + + /// Set the path part of the URL. + /// + /// If a path is set in the URL without a leading slash, a slash is prepended automatically. + pub fn set_path(&mut self, path: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_PATH, Some(path), flags) + } + + /// Get the query part of the URL. + /// + /// The initial question mark that denotes the beginning of the query part + /// is a delimiter only. It is not part of the query contents. + /// + /// A not-present query returns [`None`]. + /// + /// A zero-length query returns part as [`None`] unless + /// [`GetFlags::get_empty()`] is set. + /// + /// The query part gets pluses converted to space when asked to URL decode + /// on get with [`GetFlags::urldecode()`] set. + pub fn get_query(&self, flags: GetFlags) -> Result, Error> { + self.ffi_get(curl_sys::CURLUPART_QUERY, flags, curl_sys::CURLUE_NO_QUERY) + } + + /// Set the query part of the URL. + /// + /// The query part gets spaces converted to pluses when asked to URL encode + /// on set with [`SetFlags::urlencode()`]. + /// + /// If used together with [`SetFlags::append_query()`], the provided part is + /// appended on the end of the existing query. + /// + /// The question mark in the URL is not part of the actual query contents. + pub fn set_query(&mut self, query: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_QUERY, Some(query), flags) + } + + /// Clear the query part of the URL. + pub fn clear_query(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_QUERY, None, SetFlags::new()) + } + + /// Get the fragment part of the URL. + /// + /// The initial hash sign that denotes the beginning of the fragment is + /// a delimiter only. It is not part of the fragment contents. + /// + /// A not-present fragment returns part set to None. + /// + /// A zero-length fragment returns part as NULL unless CURLU_GET_EMPTY is set. + pub fn get_fragment(&self, flags: GetFlags) -> Result, Error> { + self.ffi_get( + curl_sys::CURLUPART_FRAGMENT, + flags, + curl_sys::CURLUE_NO_FRAGMENT, + ) + } + + /// Set the fragment part of the URL. + /// + /// The hash sign in the URL is not part of the actual fragment contents. + pub fn set_fragment(&mut self, fragment: &str, flags: SetFlags) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_FRAGMENT, Some(fragment), flags) + } + + /// Clear the fragment part of the URL. + pub fn clear_fragment(&mut self) -> Result<(), Error> { + self.ffi_set(curl_sys::CURLUPART_FRAGMENT, None, SetFlags::new()) + } +} + +impl Clone for Url { + fn clone(&self) -> Self { + let new_handle = unsafe { curl_sys::curl_url_dup(self.raw) }; + Self { raw: new_handle } + } +} + +impl Drop for Url { + fn drop(&mut self) { + unsafe { + curl_sys::curl_url_cleanup(self.raw); + } + } +} + +impl fmt::Display for Url { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.get_url(GetFlags::new()) { + Ok(url) => url.fmt(f), + Err(_) => "".fmt(f), + } + } +} + +impl fmt::Debug for Url { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Url").field("raw", &self.raw).finish() + } +} + +fn cvt(code: curl_sys::CURLUcode) -> Result<(), Error> { + if code == curl_sys::CURLUE_OK { + Ok(()) + } else { + Err(Error::new(code)) + } +} diff --git a/src/url/set_flags.rs b/src/url/set_flags.rs new file mode 100644 index 000000000..3ce1645c2 --- /dev/null +++ b/src/url/set_flags.rs @@ -0,0 +1,104 @@ +use std::ffi::c_uint; +use std::fmt; + +/// A set of options that can be used with `Url::get*` methods to modify the +/// behavior of URL retrieval. +#[derive(Clone, Copy, Default, PartialEq)] +pub struct SetFlags(pub(super) c_uint); + +impl SetFlags { + /// Creates a new [`SetFlags`] instance with no options set. + pub const fn new() -> Self { + Self(0) + } + + /// If the handle has no port stored, this option makes curl_url_get return + /// the default port for the used scheme. + pub const fn append_query(&self) -> Self { + Self(self.0 | curl_sys::CURLU_APPENDQUERY) + } + + /// If set, allows libcurl to set a non-supported scheme. It then of course + /// course cannot know if the provided scheme is a valid one or not. + pub const fn non_support_scheme(&self) -> Self { + Self(self.0 | curl_sys::CURLU_NON_SUPPORT_SCHEME) + } + + /// When set, libcurl URL encodes the part on entry, except for scheme, port + /// and URL. + /// + /// When setting the path component with URL encoding enabled, the slash + /// character is skipped. + /// + /// The query part gets space-to-plus converted before the URL conversion is + /// applied. + /// + /// This URL encoding is charset unaware and converts the input in a + /// byte-by-byte manner. + pub const fn urlencode(&self) -> Self { + Self(self.0 | curl_sys::CURLU_URLENCODE) + } + + /// If the handle has no scheme stored, this option makes curl_url_get + /// return the default scheme instead of error. + pub const fn default_scheme(&self) -> Self { + Self(self.0 | curl_sys::CURLU_DEFAULT_SCHEME) + } + + /// If set, allows the URL to be set without a scheme and it instead + /// "guesses" which scheme that was intended based on the hostname. If the + /// outermost subdomain name matches DICT, FTP, IMAP, LDAP, POP3 or SMTP + /// then that scheme is used, otherwise it picks HTTP. Conflicts with the + /// [`SetFlags::default_scheme`] option which takes precedence if both are + /// set. + /// + /// If guessing is not allowed and there is no default scheme set, trying + /// to parse a URL without a scheme returns error. + /// + /// If the scheme ends up set as a result of guessing, i.e. it is not + /// actually present in the parsed URL, it can later be figured out by + /// using the `no_guess_scheme` flag when subsequently getting the URL or + /// the scheme. + pub const fn guess_scheme(&self) -> Self { + Self(self.0 | curl_sys::CURLU_GUESS_SCHEME) + } + + /// If set, skips authority checks. The RFC allows individual schemes to + /// omit the host part (normally the only mandatory part of the authority), + /// but libcurl cannot know whether this is permitted for custom schemes. + /// Specifying the flag permits empty authority sections, similar to how + /// file scheme is handled. + pub const fn no_authority(&self) -> Self { + Self(self.0 | curl_sys::CURLU_NO_AUTHORITY) + } + + /// When set for full URL, this skips the normalization of the path. That is + /// the procedure where libcurl otherwise removes sequences of dot-slash and + /// dot-dot etc. The same option used for transfers is called + /// `CURLOPT_PATH_AS_IS`. + pub const fn path_as_is(&self) -> Self { + Self(self.0 | curl_sys::CURLU_PATH_AS_IS) + } + + /// If set, the URL parser allows space (ASCII 32) where possible. The URL + /// syntax does normally not allow spaces anywhere, but they should be + /// encoded as %20 or '+'. When spaces are allowed, they are still not + /// allowed in the scheme. When space is used and allowed in a URL, it is + /// stored as-is unless CURLU_URLENCODE is also set, which then makes + /// libcurl URL encode the space before stored. This affects how the URL is + /// constructed when curl_url_get is subsequently used to extract the full + /// URL or individual parts. + /// + /// Added in 7.78.0 + pub const fn allow_space(&self) -> Self { + Self(self.0 | curl_sys::CURLU_ALLOW_SPACE) + } +} + +impl fmt::Debug for SetFlags { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SetFlags") + .field("value", &format!("{:032b}", self.0).as_str()) + .finish() + } +} From fd63670496f0668885910f6ad8dfabcbc04e4e6c Mon Sep 17 00:00:00 2001 From: bdbai Date: Tue, 3 Jun 2025 17:13:05 +0800 Subject: [PATCH 3/3] Run url tests in ci --- ci/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/run.sh b/ci/run.sh index 82b547f94..10c9d7434 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -28,6 +28,7 @@ cargo test --target $TARGET --no-run --features static-curl,protocol-ftp,ntlm $f if [ -z "$NO_RUN" ]; then cargo test --target $TARGET $features cargo test --target $TARGET --features static-curl $features + cargo test --target $TARGET --features static-curl,url $features cargo test --target $TARGET --features static-curl,protocol-ftp $features # Note that `-Clink-dead-code` is passed here to suppress `--gc-sections` to