From 20f2d42a200844617e48c499479f6d9480efcd41 Mon Sep 17 00:00:00 2001 From: evalir Date: Wed, 16 Apr 2025 23:16:10 +0200 Subject: [PATCH 1/5] fix(`types`): conform JSON-RPC parsing to spec by properly trimming whitespace and newlines Right now we're technically off spec from JSON-RPC, as we don't trim whitespace or newlines at all from any incoming requests, but we should. This leads to problems like making it more cumbersome to use `curl` to create raw requests to routers that use ajj. `serde_json` handles this just fine. Our issue is when handling the conversion to a partially desered json rpc request. Right now this needs an extra allocation since `bytes`'s `.trim_ascii()` [only trims whitespaces according to its definition, but does not trim newlines at all](https://doc.rust-lang.org/nightly/std/primitive.u8.html#method.is_ascii_whitespace). This kinda sucks, and can be improved as this has an overhead (this would be interesting to benchmark. Closes ENG-996 --- src/types/batch.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++++ src/types/req.rs | 25 ++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/src/types/batch.rs b/src/types/batch.rs index 0ae8dea..12a232e 100644 --- a/src/types/batch.rs +++ b/src/types/batch.rs @@ -1,5 +1,6 @@ use crate::types::{Request, RequestError}; use bytes::Bytes; +use core::str; use serde::Deserialize; use serde_json::value::RawValue; use std::ops::Range; @@ -60,6 +61,8 @@ impl TryFrom for InboundData { } debug!("Parsing inbound data"); + let bytes = Bytes::from(str::from_utf8(bytes.as_ref())?.trim().to_owned()); + // Special-case a single request, rejecting invalid JSON. if bytes.starts_with(b"{") { let rv: &RawValue = serde_json::from_slice(bytes.as_ref())?; @@ -90,3 +93,48 @@ impl TryFrom for InboundData { #[derive(Debug, Deserialize)] struct DeserHelper<'a>(#[serde(borrow)] Vec<&'a RawValue>); + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_deser_batch() { + let batch = r#"[ + {"id": 1, "method": "foo", "params": [1, 2, 3]}, + {"id": 2, "method": "bar", "params": [4, 5, 6]} + ]"#; + + let bytes = Bytes::from(batch); + let batch = InboundData::try_from(bytes).unwrap(); + + assert_eq!(batch.len(), 2); + assert!(!batch.single()); + } + + #[test] + fn test_deser_single() { + let single = r#"{"id": 1, "method": "foo", "params": [1, 2, 3]}"#; + + let bytes = Bytes::from(single); + let batch = InboundData::try_from(bytes).unwrap(); + + assert_eq!(batch.len(), 1); + assert!(batch.single()); + } + + #[test] + fn test_deser_single_with_whitespace() { + let single = r#" + + {"id": 1, "method": "foo", "params": [1, 2, 3]} + + "#; + + let bytes = Bytes::from(single); + let batch = InboundData::try_from(bytes).unwrap(); + + assert_eq!(batch.len(), 1); + assert!(batch.single()); + } +} diff --git a/src/types/req.rs b/src/types/req.rs index 16464a2..81e83d9 100644 --- a/src/types/req.rs +++ b/src/types/req.rs @@ -192,6 +192,7 @@ impl Request { #[cfg(test)] mod test { + use crate::types::METHOD_LEN_LIMIT; use super::*; @@ -236,4 +237,28 @@ mod test { assert_eq!(size, METHOD_LEN_LIMIT + 1); } + + #[test] + fn test_with_linebreak() { + let bytes = Bytes::from_static( + r#" + + { "id": 1, + "jsonrpc": "2.0", + "method": "eth_getBalance", + "params": ["0x4444d38c385d0969C64c4C8f996D7536d16c28B9", "latest"] + } + + "# + .as_bytes(), + ); + let req = Request::try_from(bytes).unwrap(); + + assert_eq!(req.id(), Some("1")); + assert_eq!(req.method(), r#"eth_getBalance"#); + assert_eq!( + req.params(), + r#"["0x4444d38c385d0969C64c4C8f996D7536d16c28B9", "latest"]"# + ); + } } From 74df6b4ea8ce937eabd6dfda9c2deff723326b9c Mon Sep 17 00:00:00 2001 From: evalir Date: Wed, 16 Apr 2025 23:26:34 +0200 Subject: [PATCH 2/5] chore: add comment --- src/types/batch.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/types/batch.rs b/src/types/batch.rs index 12a232e..8c139a9 100644 --- a/src/types/batch.rs +++ b/src/types/batch.rs @@ -61,6 +61,11 @@ impl TryFrom for InboundData { } debug!("Parsing inbound data"); + // Trim whitespace from the input bytes. This is necessary to ensure that + // we can parse the input as a JSON string. The JSON spec allows for + // whitespace before and after the JSON string. + // Sadly [`Bytes::trim_ascii`] does not remove linebreaks, so we have to + // convert to a str and trim it. let bytes = Bytes::from(str::from_utf8(bytes.as_ref())?.trim().to_owned()); // Special-case a single request, rejecting invalid JSON. From 422d0ffd6137124dadb2e83516d3d4c14d4dc796 Mon Sep 17 00:00:00 2001 From: evalir Date: Thu, 17 Apr 2025 00:08:19 +0200 Subject: [PATCH 3/5] chore: rewrite approach by just piggybacking off serde_json --- src/types/batch.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/types/batch.rs b/src/types/batch.rs index 8c139a9..5b85add 100644 --- a/src/types/batch.rs +++ b/src/types/batch.rs @@ -1,6 +1,5 @@ use crate::types::{Request, RequestError}; use bytes::Bytes; -use core::str; use serde::Deserialize; use serde_json::value::RawValue; use std::ops::Range; @@ -61,17 +60,9 @@ impl TryFrom for InboundData { } debug!("Parsing inbound data"); - // Trim whitespace from the input bytes. This is necessary to ensure that - // we can parse the input as a JSON string. The JSON spec allows for - // whitespace before and after the JSON string. - // Sadly [`Bytes::trim_ascii`] does not remove linebreaks, so we have to - // convert to a str and trim it. - let bytes = Bytes::from(str::from_utf8(bytes.as_ref())?.trim().to_owned()); - - // Special-case a single request, rejecting invalid JSON. - if bytes.starts_with(b"{") { - let rv: &RawValue = serde_json::from_slice(bytes.as_ref())?; - + // First, check if it's a single request + let rv: &RawValue = serde_json::from_slice(bytes.as_ref())?; + if rv.get().starts_with("{") { let range = find_range!(bytes, rv.get()); return Ok(Self { @@ -82,7 +73,7 @@ impl TryFrom for InboundData { } // Otherwise, parse the batch - let DeserHelper(reqs) = serde_json::from_slice(bytes.as_ref())?; + let DeserHelper(reqs) = serde_json::from_str(rv.get())?; let reqs = reqs .into_iter() .map(|raw| find_range!(bytes, raw.get())) From 84e8bcd0922330038fa85654bb60f719c74ecfae Mon Sep 17 00:00:00 2001 From: James Date: Thu, 17 Apr 2025 11:08:40 -0400 Subject: [PATCH 4/5] fix: use serde_json::Deserializer --- src/types/batch.rs | 109 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 88 insertions(+), 21 deletions(-) diff --git a/src/types/batch.rs b/src/types/batch.rs index 5b85add..9ee745e 100644 --- a/src/types/batch.rs +++ b/src/types/batch.rs @@ -60,40 +60,57 @@ impl TryFrom for InboundData { } debug!("Parsing inbound data"); - // First, check if it's a single request - let rv: &RawValue = serde_json::from_slice(bytes.as_ref())?; - if rv.get().starts_with("{") { - let range = find_range!(bytes, rv.get()); + // We set up the deserializer to read from the byte buffer. + let mut deserializer = serde_json::Deserializer::from_slice(&bytes); + + // If we succesfully deser a batch, we can return it. + if let Ok(reqs) = Vec::<&RawValue>::deserialize(&mut deserializer) { + // `.end()` performs trailing charcter checks + deserializer.end()?; + let reqs = reqs + .into_iter() + .map(|raw| find_range!(bytes, raw.get())) + .collect(); return Ok(Self { bytes, - reqs: vec![range], - single: true, + reqs, + single: false, }); } - // Otherwise, parse the batch - let DeserHelper(reqs) = serde_json::from_str(rv.get())?; - let reqs = reqs - .into_iter() - .map(|raw| find_range!(bytes, raw.get())) - .collect(); + // If it's not a batch, it should be a single request. + let rv = <&RawValue>::deserialize(&mut deserializer)?; + + // `.end()` performs trailing charcter checks + deserializer.end()?; + + // If not a JSON object, return an error. + if !rv.get().starts_with("{") { + return Err(RequestError::UnexpectedJsonType); + } + + let range = find_range!(bytes, rv.get()); Ok(Self { bytes, - reqs, - single: false, + reqs: vec![range], + single: true, }) } } -#[derive(Debug, Deserialize)] -struct DeserHelper<'a>(#[serde(borrow)] Vec<&'a RawValue>); - #[cfg(test)] mod test { use super::*; + fn assert_invalid_json(batch: &'static str) { + let bytes = Bytes::from(batch); + let err = InboundData::try_from(bytes).unwrap_err(); + + assert!(matches!(err, RequestError::InvalidJson(_))); + } + #[test] fn test_deser_batch() { let batch = r#"[ @@ -121,10 +138,10 @@ mod test { #[test] fn test_deser_single_with_whitespace() { - let single = r#" - - {"id": 1, "method": "foo", "params": [1, 2, 3]} - + let single = r#" + + {"id": 1, "method": "foo", "params": [1, 2, 3]} + "#; let bytes = Bytes::from(single); @@ -133,4 +150,54 @@ mod test { assert_eq!(batch.len(), 1); assert!(batch.single()); } + + #[test] + fn test_broken_batch() { + let batch = r#"[ + {"id": 1, "method": "foo", "params": [1, 2, 3]}, + {"id": 2, "method": "bar", "params": [4, 5, 6] + ]"#; + + assert_invalid_json(batch); + } + + #[test] + fn test_junk_prefix() { + let batch = r#"JUNK[ + {"id": 1, "method": "foo", "params": [1, 2, 3]}, + {"id": 2, "method": "bar", "params": [4, 5, 6]} + ]"#; + + assert_invalid_json(batch); + } + + #[test] + fn test_junk_suffix() { + let batch = r#"[ + {"id": 1, "method": "foo", "params": [1, 2, 3]}, + {"id": 2, "method": "bar", "params": [4, 5, 6]} + ]JUNK"#; + + assert_invalid_json(batch); + } + + #[test] + fn test_invalid_utf8_prefix() { + let batch = r#"\xF1\x80[ + {"id": 1, "method": "foo", "params": [1, 2, 3]}, + {"id": 2, "method": "bar", "params": [4, 5, 6]} + ]"#; + + assert_invalid_json(batch); + } + + #[test] + fn test_invalid_utf8_suffix() { + let batch = r#"[ + {"id": 1, "method": "foo", "params": [1, 2, 3]}, + {"id": 2, "method": "bar", "params": [4, 5, 6]} + ]\xF1\x80"#; + + assert_invalid_json(batch); + } } From a65e9f477bf81fb9a2c31319d94af1af83c05ed1 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 17 Apr 2025 11:15:18 -0400 Subject: [PATCH 5/5] chore: bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2b004f5..f9cf33c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ description = "Simple, modern, ergonomic JSON-RPC 2.0 router built with tower an keywords = ["json-rpc", "jsonrpc", "json"] categories = ["web-programming::http-server", "web-programming::websocket"] -version = "0.3.3" +version = "0.3.4" edition = "2021" rust-version = "1.81" authors = ["init4", "James Prestwich"]